ovs/ovsdb/row.h

/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef OVSDB_ROW_H
#define OVSDB_ROW_H 1

#include <stddef.h>
#include <stdint.h>
#include "column.h"
#include "openvswitch/hmap.h"
#include "openvswitch/list.h"
#include "ovsdb-data.h"
#include "table.h"

struct ovsdb_column_set;

/* A weak reference.
 *
 * When a column in row A contains a weak reference to UUID of a row B this
 * constitutes a weak reference from A (the source) to B (the destination).
 *
 * Rows A and B may be in the same table or different tables.
 *
 * Weak references from a row to itself are allowed, but no "struct
 * ovsdb_weak_ref" structures are created for them.
 */
struct ovsdb_weak_ref {
    struct hmap_node dst_node;     /* In ovsdb_row's 'dst_refs' hmap. */
    struct ovs_list src_node;      /* In txn_row's 'deleted/added_refs'. */

    struct ovsdb_table *src_table; /* Source row table. */
    struct uuid src;               /* Source row uuid. */

    struct ovsdb_table *dst_table; /* Destination row table. */
    struct uuid dst;               /* Destination row uuid. */

    /* Source row's key-value pair that created this reference.
     * This information is needed in order to find and delete the reference
     * from the source row.  We need both key and value in order to avoid
     * accidential deletion of an updated data, i.e. if value in datum got
     * updated and the reference was created by the old value.
     * Storing column index in order to remove references from the correct
     * column.   'by_key' flag allows to distinguish 2 references in a corner
     * case where key and value are the same. */
    union ovsdb_atom key;
    union ovsdb_atom value;
    struct ovsdb_type type;        /* Datum type of the key-value pair. */
    unsigned int column_idx;       /* Row column index for this pair. */
    bool by_key;                   /* 'true' if reference is a 'key'. */
};

/* A row in a database table. */
struct ovsdb_row {
    struct hmap_node hmap_node;    /* Element in ovsdb_table's 'rows' hmap. */
    struct ovsdb_table *table;     /* Table to which this belongs. */
    struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */

    /* Weak references.  Updated and checked only at transaction commit. */
    struct hmap dst_refs;          /* Weak references to this row. */

    /* Number of strong refs to this row from other rows, in this table or
     * other tables, through 'uuid' columns that have a 'refTable' constraint
     * pointing to this table and a 'refType' of "strong".  A row with nonzero
     * 'n_refs' cannot be deleted.  Updated and checked only at transaction
     * commit. */
    size_t n_refs;

    /* One datum for each column (shash_count(&table->schema->columns)
     * elements). */
    struct ovsdb_datum fields[];

    /* Followed by table->schema->n_indexes "struct hmap_node"s.  In rows that
     * have have been committed as part of the database, the hmap_node with
     * index 'i' is contained in hmap table->indexes[i].  */
};

uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *);
struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *,
                                                const struct ovsdb_weak_ref *);
void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *);


struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *);
struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *);
struct ovsdb_row *ovsdb_row_datum_clone(const struct ovsdb_row *);
void ovsdb_row_destroy(struct ovsdb_row *);

uint32_t ovsdb_row_hash_columns(const struct ovsdb_row *,
                                const struct ovsdb_column_set *,
                                uint32_t basis);
bool ovsdb_row_equal_columns(const struct ovsdb_row *,
                             const struct ovsdb_row *,
                             const struct ovsdb_column_set *);
int ovsdb_row_compare_columns_3way(const struct ovsdb_row *,
                                   const struct ovsdb_row *,
                                   const struct ovsdb_column_set *);
struct ovsdb_error *ovsdb_row_update_columns(struct ovsdb_row *,
                                             const struct ovsdb_row *,
                                             const struct ovsdb_column_set *,
                                             bool xor);
void ovsdb_row_columns_to_string(const struct ovsdb_row *,
                                 const struct ovsdb_column_set *, struct ds *);
struct ovsdb_error *ovsdb_row_from_json(struct ovsdb_row *,
                                        const struct json *,
                                        struct ovsdb_symbol_table *,
                                        struct ovsdb_column_set *included,
                                        bool is_diff)
    OVS_WARN_UNUSED_RESULT;
struct json *ovsdb_row_to_json(const struct ovsdb_row *,
                               const struct ovsdb_column_set *include);
void ovsdb_row_to_string(const struct ovsdb_row *, struct ds *);

static inline const struct uuid *
ovsdb_row_get_uuid(const struct ovsdb_row *row)
{
    return &row->fields[OVSDB_COL_UUID].keys[0].uuid;
}

static inline struct uuid *
ovsdb_row_get_uuid_rw(struct ovsdb_row *row)
{
    ovsdb_datum_unshare(&row->fields[OVSDB_COL_UUID], &ovsdb_type_uuid);
    return &row->fields[OVSDB_COL_UUID].keys[0].uuid;
}

static inline const struct uuid *
ovsdb_row_get_version(const struct ovsdb_row *row)
{
    return &row->fields[OVSDB_COL_VERSION].keys[0].uuid;
}

static inline struct uuid *
ovsdb_row_get_version_rw(struct ovsdb_row *row)
{
    ovsdb_datum_unshare(&row->fields[OVSDB_COL_VERSION], &ovsdb_type_uuid);
    return &row->fields[OVSDB_COL_VERSION].keys[0].uuid;
}

static inline uint32_t
ovsdb_row_hash(const struct ovsdb_row *row)
{
    return uuid_hash(ovsdb_row_get_uuid(row));
}

/* Returns the offset in bytes from the start of an ovsdb_row for 'table' to
 * the hmap_node for the index numbered 'i'. */
static inline size_t
ovsdb_row_index_offset__(const struct ovsdb_table *table, size_t i)
{
    size_t n_fields = shash_count(&table->schema->columns);
    return (offsetof(struct ovsdb_row, fields)
            + n_fields * sizeof(struct ovsdb_datum)
            + i * sizeof(struct hmap_node));
}

/* Returns the hmap_node in 'row' for the index numbered 'i'. */
static inline struct hmap_node *
ovsdb_row_get_index_node(struct ovsdb_row *row, size_t i)
{
    return (void *) ((char *) row + ovsdb_row_index_offset__(row->table, i));
}

/* Returns the ovsdb_row given 'index_node', which is a pointer to that row's
 * hmap_node for the index numbered 'i' within 'table'. */
static inline struct ovsdb_row *
ovsdb_row_from_index_node(struct hmap_node *index_node,
                          const struct ovsdb_table *table, size_t i)
{
    return (void *) ((char *) index_node - ovsdb_row_index_offset__(table, i));
}

/* An unordered collection of rows. */
struct ovsdb_row_set {
    const struct ovsdb_row **rows;
    size_t n_rows, allocated_rows;
};

#define OVSDB_ROW_SET_INITIALIZER { NULL, 0, 0 }

void ovsdb_row_set_init(struct ovsdb_row_set *);
void ovsdb_row_set_destroy(struct ovsdb_row_set *);
void ovsdb_row_set_add_row(struct ovsdb_row_set *, const struct ovsdb_row *);

struct json *ovsdb_row_set_to_json(const struct ovsdb_row_set *,
                                   const struct ovsdb_column_set *);

void ovsdb_row_set_sort(struct ovsdb_row_set *,
                        const struct ovsdb_column_set *);

/* A hash table of rows.  A specified set of columns is used for hashing and
 * comparing rows.
 *
 * The row hash doesn't necessarily own its rows.  They may be owned by, for
 * example, an ovsdb_table. */
struct ovsdb_row_hash {
    struct hmap rows;
    struct ovsdb_column_set columns;
};

#define OVSDB_ROW_HASH_INITIALIZER(RH) \
    { HMAP_INITIALIZER(&(RH).rows), OVSDB_COLUMN_SET_INITIALIZER }

struct ovsdb_row_hash_node {
    struct hmap_node hmap_node;
    const struct ovsdb_row *row;
};

void ovsdb_row_hash_init(struct ovsdb_row_hash *,
                         const struct ovsdb_column_set *);
void ovsdb_row_hash_destroy(struct ovsdb_row_hash *, bool destroy_rows);
size_t ovsdb_row_hash_count(const struct ovsdb_row_hash *);
bool ovsdb_row_hash_contains(const struct ovsdb_row_hash *,
                             const struct ovsdb_row *);
bool ovsdb_row_hash_contains_all(const struct ovsdb_row_hash *,
                                 const struct ovsdb_row_hash *);
bool ovsdb_row_hash_insert(struct ovsdb_row_hash *, const struct ovsdb_row *);
bool ovsdb_row_hash_contains__(const struct ovsdb_row_hash *,
                               const struct ovsdb_row *, size_t hash);
bool ovsdb_row_hash_insert__(struct ovsdb_row_hash *,
                             const struct ovsdb_row *, size_t hash);

#endif /* ovsdb/row.h */
-												Global replace of Nicira Networks.

Replaced all instances of Nicira Networks(, Inc) to Nicira, Inc.

Feature #10593
Signed-off-by: Raju Subramanian <rsubramanian@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-02 15:21:36 -07:00
+								/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								#ifndef OVSDB_ROW_H
 								#define OVSDB_ROW_H 1
 								#include <stddef.h>
 								#include <stdint.h>
 								#include "column.h"
-												json: Move from lib to include/openvswitch.

To easily allow both in- and out-of-tree building of the Python
wrapper for the OVS JSON parser (e.g. w/ pip), move json.h to
include/openvswitch. This also requires moving lib/{hmap,shash}.h.

Both hmap.h and shash.h were #include-ing "util.h" even though the
headers themselves did not use anything from there, but rather from
include/openvswitch/util.h. Fixing that required including util.h
in several C files mostly due to OVS_NOT_REACHED and things like
xmalloc.

Signed-off-by: Terry Wilson <twilson@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-12 16:37:34 -05:00
+								#include "openvswitch/hmap.h"
-												list: Remove lib/list.h completely.

All code is now in include/openvswitch/list.h.

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:21 -07:00
+								#include "openvswitch/list.h"
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								#include "ovsdb-data.h"
-												ovsdb: Use table indexes if available for ovsdb_query().

Currently all OVSDB database queries except for UUID lookups all result
in linear lookups over the entire table, even if an index is present.

This patch modifies ovsdb_query() to attempt an index lookup first, if
possible. If no matching indexes are present then a linear index is
still conducted.

To test this, I set up an ovsdb database with a variable number of rows
and timed the average of how long ovsdb-client took to query a single
row. The first two tests involved a linear scan that didn't match any
rows, so there was no overhead associated with sending or encoding
output. The post-patch linear scan was a worst case scenario where the
table did have an appropriate index but the conditions made its usage
impossible. The indexed lookup test was for a matching row, which did
also include overhead associated with a match. The results are included
in the table below.

Rows                   | 100k | 200k | 300k | 400k | 500k
-----------------------+------+------+------+------+-----
Pre-patch linear scan  |  9ms | 24ms | 37ms | 49ms | 61ms
Post-patch linear scan |  9ms | 24ms | 38ms | 49ms | 61ms
Indexed lookup         |  3ms |  3ms |  3ms |  3ms |  3ms

I also tested the performance of ovsdb_query() by wrapping it in a loop
and measuring the time it took to perform 1000 linear scans on 1, 10,
100k, and 200k rows. This test showed that the new index checking code
did not slow down worst case lookups to a statistically detectable
degree.

Reported-at: https://issues.redhat.com/browse/FDP-590
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-06-19 08:54:53 -04:00
+								#include "table.h"
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
 								struct ovsdb_column_set;
-												ovsdb: Add support for weak references.

											
										
										
											2010-03-15 15:41:54 -07:00
+								/* A weak reference.
 								 *
 								 * When a column in row A contains a weak reference to UUID of a row B this
 								 * constitutes a weak reference from A (the source) to B (the destination).
 								 *
 								 * Rows A and B may be in the same table or different tables.
 								 *
 								 * Weak references from a row to itself are allowed, but no "struct
 								 * ovsdb_weak_ref" structures are created for them.
 								 */
 								struct ovsdb_weak_ref {
-												ovsdb: transaction: Incremental reassessment of weak refs.

The main idea is to not store list of weak references in the source
row, so they all don't need to be re-checked/updated on every
modification of that source row.  The point is that source row already
knows UUIDs of all destination rows stored in the data, so there is no
much profit in storing this information somewhere else.  If needed,
destination row can be looked up and reference can be looked up in the
destination row.  For the fast lookup, destination row now stores
references in a hash map.

Weak reference structure now contains the table and uuid of a source
row instead of a direct pointer.  This allows to replace/update the
source row without breaking any weak references stored in destination
rows.

Structure also now contains the key-value pair of atoms that triggered
creation of this reference.  These atoms can be used to quickly
subtract removed references from a source row.  During reassessment,
ovsdb now only needs to care about new added or removed atoms, and
atoms that got removed due to removal of the destination rows, but
these are marked for reassessment by the destination row.

ovsdb_datum_subtract() is used to remove atoms that points to removed
or incorrect rows, so there is no need to re-sort datum in the end.

Results of an OVN load-balancer benchmark that adds 3K load-balancers
to each of 120 logical switches and 120 logical routers in the OVN
sandbox with clustered Northbound database and then removes them:

Before:

  %CPU  CPU Time  CMD
  86.8  00:16:05  ovsdb-server nb1.db
  44.1  00:08:11  ovsdb-server nb2.db
  43.2  00:08:00  ovsdb-server nb3.db

After:

  %CPU  CPU Time  CMD
  54.9  00:02:58  ovsdb-server nb1.db
  33.3  00:01:48  ovsdb-server nb2.db
  32.2  00:01:44  ovsdb-server nb3.db

So, on a cluster leader the processing time dropped by 5.4x, on
followers - by 4.5x.  More load-balancers - larger the performance
difference.  There is a slight increase of memory usage, because new
reference structure is larger, but the difference is not significant.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2021-10-16 03:20:23 +02:00
+								    struct hmap_node dst_node;     /* In ovsdb_row's 'dst_refs' hmap. */
 								    struct ovs_list src_node;      /* In txn_row's 'deleted/added_refs'. */
 								    struct ovsdb_table *src_table; /* Source row table. */
 								    struct uuid src;               /* Source row uuid. */
 								    struct ovsdb_table *dst_table; /* Destination row table. */
-												ovsdb: Weak references performance fix

Prevents the cloning of rows with outgoing or incoming weak references when
those rows aren't being modified.

It improves the OVSDB Server performance when many rows with weak references
are involved in a transaction.

In the original code (dst_refs is created from scratch):

old->dst_refs = all the rows that weak referenced old

new->dst_refs = all the rows that weak referenced old and are still weak
+referencing new + rows in the transaction that weak referenced new

In the patch (dst_refs incrementally built):
Old->dst_refs = all the rows that weak referenced old

Ideally, but expansive to calculate:
New->dst_refs = old->dst_refs - "weak references removed within this TXN" +
+"weak references created within this TXN"

What this patch implements:
New->dst_refs = old->dst_refs - "weak references in old rows in TXN" + "weak
+references in new rows in TXN"

The resulting sets should be equal in both cases.

We do some more optimizations:

- If we know that the transactions must be successful at some point then,
  instead of cloning dst_refs we could just move the elements between
  the lists.

- At that point we lost the rollback feature, but we aren't going to need
  it anyway (note that we didn't really touch the src_refs part).

- The references in dst_refs must point to new instead than old.
  Previously we iterated over all the weak references in dst_refs
  to change that pointer, but using an UUID is easier, and prevents
  that iteration completely.

For some more commentary, see:
http://openvswitch.org/pipermail/dev/2016-July/074840.html

Signed-off-by: Esteban Rodriguez Betancourt <estebarb@hpe.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-13 17:28:51 +00:00
+								    struct uuid dst;               /* Destination row uuid. */
-												ovsdb: transaction: Incremental reassessment of weak refs.

The main idea is to not store list of weak references in the source
row, so they all don't need to be re-checked/updated on every
modification of that source row.  The point is that source row already
knows UUIDs of all destination rows stored in the data, so there is no
much profit in storing this information somewhere else.  If needed,
destination row can be looked up and reference can be looked up in the
destination row.  For the fast lookup, destination row now stores
references in a hash map.

Weak reference structure now contains the table and uuid of a source
row instead of a direct pointer.  This allows to replace/update the
source row without breaking any weak references stored in destination
rows.

Structure also now contains the key-value pair of atoms that triggered
creation of this reference.  These atoms can be used to quickly
subtract removed references from a source row.  During reassessment,
ovsdb now only needs to care about new added or removed atoms, and
atoms that got removed due to removal of the destination rows, but
these are marked for reassessment by the destination row.

ovsdb_datum_subtract() is used to remove atoms that points to removed
or incorrect rows, so there is no need to re-sort datum in the end.

Results of an OVN load-balancer benchmark that adds 3K load-balancers
to each of 120 logical switches and 120 logical routers in the OVN
sandbox with clustered Northbound database and then removes them:

Before:

  %CPU  CPU Time  CMD
  86.8  00:16:05  ovsdb-server nb1.db
  44.1  00:08:11  ovsdb-server nb2.db
  43.2  00:08:00  ovsdb-server nb3.db

After:

  %CPU  CPU Time  CMD
  54.9  00:02:58  ovsdb-server nb1.db
  33.3  00:01:48  ovsdb-server nb2.db
  32.2  00:01:44  ovsdb-server nb3.db

So, on a cluster leader the processing time dropped by 5.4x, on
followers - by 4.5x.  More load-balancers - larger the performance
difference.  There is a slight increase of memory usage, because new
reference structure is larger, but the difference is not significant.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2021-10-16 03:20:23 +02:00
 								    /* Source row's key-value pair that created this reference.
 								     * This information is needed in order to find and delete the reference
 								     * from the source row.  We need both key and value in order to avoid
 								     * accidential deletion of an updated data, i.e. if value in datum got
 								     * updated and the reference was created by the old value.
 								     * Storing column index in order to remove references from the correct
 								     * column.   'by_key' flag allows to distinguish 2 references in a corner
 								     * case where key and value are the same. */
 								    union ovsdb_atom key;
 								    union ovsdb_atom value;
 								    struct ovsdb_type type;        /* Datum type of the key-value pair. */
 								    unsigned int column_idx;       /* Row column index for this pair. */
 								    bool by_key;                   /* 'true' if reference is a 'key'. */
-												ovsdb: Add support for weak references.

											
										
										
											2010-03-15 15:41:54 -07:00
+								};
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								/* A row in a database table. */
 								struct ovsdb_row {
-												ovsdb: Add support for weak references.

											
										
										
											2010-03-15 15:41:54 -07:00
+								    struct hmap_node hmap_node;    /* Element in ovsdb_table's 'rows' hmap. */
-												Rearrange structures to better fit valgrind's memory leak heuristics.

valgrind's memory leak detector considers a pointer to the head of a memory
block to be "definitely" a pointer to that memory block but a pointer to
the interior of a memory block only "possibly" a pointer to that memory
block.  Open vSwitch hmap_node and list data structures can go anywhere
inside a structure; if they are in the middle of a structure then valgrind
considers pointers to them to be possible leaks.  Therefore, this commit
moves some of these from the middle of data structures to the head, to
reduce valgrind's uncertainty.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-03-28 14:13:02 -07:00
+								    struct ovsdb_table *table;     /* Table to which this belongs. */
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */
-												ovsdb: Add support for referential integrity in the database itself.

											
										
										
											2010-02-08 14:09:41 -08:00
-												osvdb: Add some helpful comments.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2018-11-01 09:29:07 -07:00
+								    /* Weak references.  Updated and checked only at transaction commit. */
-												ovsdb: transaction: Incremental reassessment of weak refs.

The main idea is to not store list of weak references in the source
row, so they all don't need to be re-checked/updated on every
modification of that source row.  The point is that source row already
knows UUIDs of all destination rows stored in the data, so there is no
much profit in storing this information somewhere else.  If needed,
destination row can be looked up and reference can be looked up in the
destination row.  For the fast lookup, destination row now stores
references in a hash map.

Weak reference structure now contains the table and uuid of a source
row instead of a direct pointer.  This allows to replace/update the
source row without breaking any weak references stored in destination
rows.

Structure also now contains the key-value pair of atoms that triggered
creation of this reference.  These atoms can be used to quickly
subtract removed references from a source row.  During reassessment,
ovsdb now only needs to care about new added or removed atoms, and
atoms that got removed due to removal of the destination rows, but
these are marked for reassessment by the destination row.

ovsdb_datum_subtract() is used to remove atoms that points to removed
or incorrect rows, so there is no need to re-sort datum in the end.

Results of an OVN load-balancer benchmark that adds 3K load-balancers
to each of 120 logical switches and 120 logical routers in the OVN
sandbox with clustered Northbound database and then removes them:

Before:

  %CPU  CPU Time  CMD
  86.8  00:16:05  ovsdb-server nb1.db
  44.1  00:08:11  ovsdb-server nb2.db
  43.2  00:08:00  ovsdb-server nb3.db

After:

  %CPU  CPU Time  CMD
  54.9  00:02:58  ovsdb-server nb1.db
  33.3  00:01:48  ovsdb-server nb2.db
  32.2  00:01:44  ovsdb-server nb3.db

So, on a cluster leader the processing time dropped by 5.4x, on
followers - by 4.5x.  More load-balancers - larger the performance
difference.  There is a slight increase of memory usage, because new
reference structure is larger, but the difference is not significant.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2021-10-16 03:20:23 +02:00
+								    struct hmap dst_refs;          /* Weak references to this row. */
-												ovsdb: Add support for weak references.

											
										
										
											2010-03-15 15:41:54 -07:00
 								    /* Number of strong refs to this row from other rows, in this table or
 								     * other tables, through 'uuid' columns that have a 'refTable' constraint
 								     * pointing to this table and a 'refType' of "strong".  A row with nonzero
 								     * 'n_refs' cannot be deleted.  Updated and checked only at transaction
 								     * commit. */
-												ovsdb: Add support for referential integrity in the database itself.

											
										
										
											2010-02-08 14:09:41 -08:00
+								    size_t n_refs;
-												ovsdb: Implement table uniqueness constraints ("indexes").

											
										
										
											2011-06-06 09:09:10 -07:00
+								    /* One datum for each column (shash_count(&table->schema->columns)
 								     * elements). */
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    struct ovsdb_datum fields[];
-												ovsdb: Implement table uniqueness constraints ("indexes").

											
										
										
											2011-06-06 09:09:10 -07:00
 								    /* Followed by table->schema->n_indexes "struct hmap_node"s.  In rows that
 								     * have have been committed as part of the database, the hmap_node with
 								     * index 'i' is contained in hmap table->indexes[i].  */
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								};
-												ovsdb: transaction: Incremental reassessment of weak refs.

The main idea is to not store list of weak references in the source
row, so they all don't need to be re-checked/updated on every
modification of that source row.  The point is that source row already
knows UUIDs of all destination rows stored in the data, so there is no
much profit in storing this information somewhere else.  If needed,
destination row can be looked up and reference can be looked up in the
destination row.  For the fast lookup, destination row now stores
references in a hash map.

Weak reference structure now contains the table and uuid of a source
row instead of a direct pointer.  This allows to replace/update the
source row without breaking any weak references stored in destination
rows.

Structure also now contains the key-value pair of atoms that triggered
creation of this reference.  These atoms can be used to quickly
subtract removed references from a source row.  During reassessment,
ovsdb now only needs to care about new added or removed atoms, and
atoms that got removed due to removal of the destination rows, but
these are marked for reassessment by the destination row.

ovsdb_datum_subtract() is used to remove atoms that points to removed
or incorrect rows, so there is no need to re-sort datum in the end.

Results of an OVN load-balancer benchmark that adds 3K load-balancers
to each of 120 logical switches and 120 logical routers in the OVN
sandbox with clustered Northbound database and then removes them:

Before:

  %CPU  CPU Time  CMD
  86.8  00:16:05  ovsdb-server nb1.db
  44.1  00:08:11  ovsdb-server nb2.db
  43.2  00:08:00  ovsdb-server nb3.db

After:

  %CPU  CPU Time  CMD
  54.9  00:02:58  ovsdb-server nb1.db
  33.3  00:01:48  ovsdb-server nb2.db
  32.2  00:01:44  ovsdb-server nb3.db

So, on a cluster leader the processing time dropped by 5.4x, on
followers - by 4.5x.  More load-balancers - larger the performance
difference.  There is a slight increase of memory usage, because new
reference structure is larger, but the difference is not significant.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2021-10-16 03:20:23 +02:00
+								uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *);
 								struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *,
 								                                                const struct ovsdb_weak_ref *);
 								void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *);
 								struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *);
-												ovsdb: Prepare snapshot JSON in a separate thread.

Conversion of the database data into JSON object, serialization
and destruction of that object are the most heavy operations
during the database compaction.  If these operations are moved
to a separate thread, the main thread can continue processing
database requests in the meantime.

With this change, the compaction is split in 3 phases:

1. Initialization:
   - Create a copy of the database.
   - Remember current database index.
   - Start a separate thread to convert a copy of the database
     into serialized JSON object.

2. Wait:
   - Continue normal operation until compaction thread is done.
   - Meanwhile, compaction thread:
     * Convert database copy to JSON.
     * Serialize resulted JSON.
     * Destroy original JSON object.

3. Finish:
   - Destroy the database copy.
   - Take the snapshot created by the thread.
   - Write on disk.

The key for this schema to be fast is the ability to create
a shallow copy of the database.  This doesn't take too much
time allowing the thread to do most of work.

Database copy is created and destroyed only by the main thread,
so there is no need for synchronization.

Such solution allows to reduce the time main thread is blocked
by compaction by 80-90%.  For example, in ovn-heater tests
with 120 node density-heavy scenario, where compaction normally
takes 5-6 seconds at the end of a test, measured compaction
times was all below 1 second with the change applied.  Also,
note that these measured times are the sum of phases 1 and 3,
so actual poll intervals are about half a second in this case.

Only implemented for raft storage for now.  The implementation
for standalone databases can be added later by using a file
offset as a database index and copying newly added changes
from the old file to a new one during ovsdb_log_replace().

Reported-at: https://bugzilla.redhat.com/2069108
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-07-01 01:34:07 +02:00
+								struct ovsdb_row *ovsdb_row_datum_clone(const struct ovsdb_row *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								void ovsdb_row_destroy(struct ovsdb_row *);
 								uint32_t ovsdb_row_hash_columns(const struct ovsdb_row *,
 								                                const struct ovsdb_column_set *,
 								                                uint32_t basis);
 								bool ovsdb_row_equal_columns(const struct ovsdb_row *,
 								                             const struct ovsdb_row *,
 								                             const struct ovsdb_column_set *);
 								int ovsdb_row_compare_columns_3way(const struct ovsdb_row *,
 								                                   const struct ovsdb_row *,
 								                                   const struct ovsdb_column_set *);
-												ovsdb: row: Add support for xor-based row updates.

This will be used to apply update3 type updates to ovsdb tables
while processing updates for future ovsdb 'relay' service model.

'ovsdb_datum_apply_diff' is allowed to fail, so adding support
to return this error.

Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-06-01 23:01:22 +02:00
+								struct ovsdb_error *ovsdb_row_update_columns(struct ovsdb_row *,
 								                                             const struct ovsdb_row *,
 								                                             const struct ovsdb_column_set *,
 								                                             bool xor);
-												ovsdb: Add functions for formatting column sets and data in columns sets.

These will be used for formatting error messages in an upcoming commit.

											
										
										
											2011-06-06 09:02:01 -07:00
+								void ovsdb_row_columns_to_string(const struct ovsdb_row *,
 								                                 const struct ovsdb_column_set *, struct ds *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								struct ovsdb_error *ovsdb_row_from_json(struct ovsdb_row *,
 								                                        const struct json *,
-												ovsdb: Get rid of "declare" operation.

It's more elegant, and just as easy to implement, if we allow a
"named-uuid" to be a forward reference to a "uuid-name" in a later
"insert" operation.

											
										
										
											2010-02-08 16:03:21 -08:00
+								                                        struct ovsdb_symbol_table *,
-												ovsdb: relay: Fix handling of XOR updates with size constraints.

Relay servers apply updates via ovsdb_table_execute_update().  XOR
updates contain datum diffs, and datum diffs can be larger than the
type constraints.  Currently, relay will fail to parse such update
into ovsdb row triggering a syntax error and a re-connection.

Fix that by relaxing the size constraints for this kind of updates.

Fixes: 026c77c58ddb ("ovsdb: New ovsdb 'relay' service model.")
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-07-25 11:32:19 +02:00
+								                                        struct ovsdb_column_set *included,
 								                                        bool is_diff)
-												lib: Move compiler.h to <openvswitch/compiler.h>

The following macros are renamed to avoid conflicts with other headers:
 * WARN_UNUSED_RESULT to OVS_WARN_UNUSED_RESULT
 * PRINTF_FORMAT to OVS_PRINTF_FORMAT
 * NO_RETURN to OVS_NO_RETURN

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								    OVS_WARN_UNUSED_RESULT;
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								struct json *ovsdb_row_to_json(const struct ovsdb_row *,
 								                               const struct ovsdb_column_set *include);
-												ovsdb-server: Log database transactions for user requested tables.

Add a new command, 'ovsdb-server/tlog-set DB:TABLE on|off', which
allows the user to enable/disable transaction logging for specific
databases and tables.

By default, logging is disabled.  Once enabled, logs are generated
with level INFO and are also rate limited.

If used with care, this command can be useful in analyzing production
deployment performance issues, allowing the user to pin point
bottlenecks without the need to enable wider debug logs, e.g., jsonrpc.

A command to inspect the logging state is also added:
'ovsdb-server/tlog-list'.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-06-24 11:55:58 +02:00
+								void ovsdb_row_to_string(const struct ovsdb_row *, struct ds *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
 								static inline const struct uuid *
 								ovsdb_row_get_uuid(const struct ovsdb_row *row)
 								{
 								    return &row->fields[OVSDB_COL_UUID].keys[0].uuid;
 								}
 								static inline struct uuid *
 								ovsdb_row_get_uuid_rw(struct ovsdb_row *row)
 								{
-												ovsdb: Fix incorrect sharing of UUID and _version columns.

Datum of UUID and _version columns is accessed directly via
ovsdb_row_get_uuid_rw() and ovsdb_row_get_version_rw() functions
instead of ovsdb_data_* functions.  Meaning, the data will be
directly modified even if it is shared between rows.

Fix that by unsharing the data whenever RW pointer is taken.

The issue was mostly hidden because weak reference assessment
code always called ovsdb_datum_subtract() even if not needed.
This way all the new transaction rows were always implicitly
unshared.

Also making ovsdb_datum_subtract() call conditional, so the
issue can be hit by existing unit tests.

Fixes: 485ac63d10f8 ("ovsdb: Add lazy-copy support for ovsdb_datum objects.")
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-12-18 03:02:40 +01:00
+								    ovsdb_datum_unshare(&row->fields[OVSDB_COL_UUID], &ovsdb_type_uuid);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    return &row->fields[OVSDB_COL_UUID].keys[0].uuid;
 								}
 								static inline const struct uuid *
 								ovsdb_row_get_version(const struct ovsdb_row *row)
 								{
 								    return &row->fields[OVSDB_COL_VERSION].keys[0].uuid;
 								}
 								static inline struct uuid *
 								ovsdb_row_get_version_rw(struct ovsdb_row *row)
 								{
-												ovsdb: Fix incorrect sharing of UUID and _version columns.

Datum of UUID and _version columns is accessed directly via
ovsdb_row_get_uuid_rw() and ovsdb_row_get_version_rw() functions
instead of ovsdb_data_* functions.  Meaning, the data will be
directly modified even if it is shared between rows.

Fix that by unsharing the data whenever RW pointer is taken.

The issue was mostly hidden because weak reference assessment
code always called ovsdb_datum_subtract() even if not needed.
This way all the new transaction rows were always implicitly
unshared.

Also making ovsdb_datum_subtract() call conditional, so the
issue can be hit by existing unit tests.

Fixes: 485ac63d10f8 ("ovsdb: Add lazy-copy support for ovsdb_datum objects.")
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-12-18 03:02:40 +01:00
+								    ovsdb_datum_unshare(&row->fields[OVSDB_COL_VERSION], &ovsdb_type_uuid);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    return &row->fields[OVSDB_COL_VERSION].keys[0].uuid;
 								}
 								static inline uint32_t
 								ovsdb_row_hash(const struct ovsdb_row *row)
 								{
 								    return uuid_hash(ovsdb_row_get_uuid(row));
 								}
-												ovsdb: Use table indexes if available for ovsdb_query().

Currently all OVSDB database queries except for UUID lookups all result
in linear lookups over the entire table, even if an index is present.

This patch modifies ovsdb_query() to attempt an index lookup first, if
possible. If no matching indexes are present then a linear index is
still conducted.

To test this, I set up an ovsdb database with a variable number of rows
and timed the average of how long ovsdb-client took to query a single
row. The first two tests involved a linear scan that didn't match any
rows, so there was no overhead associated with sending or encoding
output. The post-patch linear scan was a worst case scenario where the
table did have an appropriate index but the conditions made its usage
impossible. The indexed lookup test was for a matching row, which did
also include overhead associated with a match. The results are included
in the table below.

Rows                   | 100k | 200k | 300k | 400k | 500k
-----------------------+------+------+------+------+-----
Pre-patch linear scan  |  9ms | 24ms | 37ms | 49ms | 61ms
Post-patch linear scan |  9ms | 24ms | 38ms | 49ms | 61ms
Indexed lookup         |  3ms |  3ms |  3ms |  3ms |  3ms

I also tested the performance of ovsdb_query() by wrapping it in a loop
and measuring the time it took to perform 1000 linear scans on 1, 10,
100k, and 200k rows. This test showed that the new index checking code
did not slow down worst case lookups to a statistically detectable
degree.

Reported-at: https://issues.redhat.com/browse/FDP-590
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-06-19 08:54:53 -04:00
 								/* Returns the offset in bytes from the start of an ovsdb_row for 'table' to
 								 * the hmap_node for the index numbered 'i'. */
 								static inline size_t
 								ovsdb_row_index_offset__(const struct ovsdb_table *table, size_t i)
 								{
 								    size_t n_fields = shash_count(&table->schema->columns);
 								    return (offsetof(struct ovsdb_row, fields)
 								            + n_fields * sizeof(struct ovsdb_datum)
 								            + i * sizeof(struct hmap_node));
 								}
 								/* Returns the hmap_node in 'row' for the index numbered 'i'. */
 								static inline struct hmap_node *
 								ovsdb_row_get_index_node(struct ovsdb_row *row, size_t i)
 								{
 								    return (void *) ((char *) row + ovsdb_row_index_offset__(row->table, i));
 								}
 								/* Returns the ovsdb_row given 'index_node', which is a pointer to that row's
 								 * hmap_node for the index numbered 'i' within 'table'. */
 								static inline struct ovsdb_row *
 								ovsdb_row_from_index_node(struct hmap_node *index_node,
 								                          const struct ovsdb_table *table, size_t i)
 								{
 								    return (void *) ((char *) index_node - ovsdb_row_index_offset__(table, i));
 								}
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
 								/* An unordered collection of rows. */
 								struct ovsdb_row_set {
 								    const struct ovsdb_row **rows;
 								    size_t n_rows, allocated_rows;
 								};
 								#define OVSDB_ROW_SET_INITIALIZER { NULL, 0, 0 }
 								void ovsdb_row_set_init(struct ovsdb_row_set *);
 								void ovsdb_row_set_destroy(struct ovsdb_row_set *);
 								void ovsdb_row_set_add_row(struct ovsdb_row_set *, const struct ovsdb_row *);
 								struct json *ovsdb_row_set_to_json(const struct ovsdb_row_set *,
 								                                   const struct ovsdb_column_set *);
 								void ovsdb_row_set_sort(struct ovsdb_row_set *,
 								                        const struct ovsdb_column_set *);
 								/* A hash table of rows.  A specified set of columns is used for hashing and
 								 * comparing rows.
 								 *
 								 * The row hash doesn't necessarily own its rows.  They may be owned by, for
 								 * example, an ovsdb_table. */
 								struct ovsdb_row_hash {
 								    struct hmap rows;
 								    struct ovsdb_column_set columns;
 								};
 								#define OVSDB_ROW_HASH_INITIALIZER(RH) \
 								    { HMAP_INITIALIZER(&(RH).rows), OVSDB_COLUMN_SET_INITIALIZER }
 								struct ovsdb_row_hash_node {
 								    struct hmap_node hmap_node;
 								    const struct ovsdb_row *row;
 								};
 								void ovsdb_row_hash_init(struct ovsdb_row_hash *,
 								                         const struct ovsdb_column_set *);
 								void ovsdb_row_hash_destroy(struct ovsdb_row_hash *, bool destroy_rows);
 								size_t ovsdb_row_hash_count(const struct ovsdb_row_hash *);
 								bool ovsdb_row_hash_contains(const struct ovsdb_row_hash *,
 								                             const struct ovsdb_row *);
 								bool ovsdb_row_hash_contains_all(const struct ovsdb_row_hash *,
 								                                 const struct ovsdb_row_hash *);
 								bool ovsdb_row_hash_insert(struct ovsdb_row_hash *, const struct ovsdb_row *);
 								bool ovsdb_row_hash_contains__(const struct ovsdb_row_hash *,
 								                               const struct ovsdb_row *, size_t hash);
 								bool ovsdb_row_hash_insert__(struct ovsdb_row_hash *,
 								                             const struct ovsdb_row *, size_t hash);
 								#endif /* ovsdb/row.h */