2012-09-07 10:07:03 -07:00
|
|
|
/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
|
2009-11-04 15:11:44 -08:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include "trigger.h"
|
|
|
|
|
|
|
|
#include <limits.h>
|
2017-12-31 21:15:58 -08:00
|
|
|
#include <string.h>
|
2009-11-04 15:11:44 -08:00
|
|
|
|
2024-01-16 22:52:05 +00:00
|
|
|
#include "cooperative-multitasking.h"
|
2017-12-28 13:21:11 -08:00
|
|
|
#include "file.h"
|
2016-07-12 16:37:34 -05:00
|
|
|
#include "openvswitch/json.h"
|
2009-11-04 15:11:44 -08:00
|
|
|
#include "jsonrpc.h"
|
|
|
|
#include "ovsdb.h"
|
2017-12-28 13:21:11 -08:00
|
|
|
#include "ovsdb-error.h"
|
2017-11-03 13:53:53 +08:00
|
|
|
#include "openvswitch/poll-loop.h"
|
2011-07-26 10:17:36 -07:00
|
|
|
#include "server.h"
|
2017-12-31 21:15:58 -08:00
|
|
|
#include "transaction.h"
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
#include "transaction-forward.h"
|
2017-12-31 21:15:58 -08:00
|
|
|
#include "openvswitch/vlog.h"
|
2017-11-03 13:53:53 +08:00
|
|
|
#include "util.h"
|
2023-03-27 21:43:00 +02:00
|
|
|
#include "uuid.h"
|
2009-11-04 15:11:44 -08:00
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
VLOG_DEFINE_THIS_MODULE(trigger);
|
2017-12-28 13:21:11 -08:00
|
|
|
|
2011-07-26 10:17:36 -07:00
|
|
|
static bool ovsdb_trigger_try(struct ovsdb_trigger *, long long int now);
|
2017-12-31 21:15:58 -08:00
|
|
|
static void ovsdb_trigger_complete(struct ovsdb_trigger *);
|
|
|
|
static void trigger_convert_error(struct ovsdb_trigger *,
|
|
|
|
struct ovsdb_error *);
|
2017-12-28 13:21:11 -08:00
|
|
|
static void trigger_success(struct ovsdb_trigger *, struct json *result);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
2017-12-28 13:21:11 -08:00
|
|
|
bool
|
2012-09-07 10:07:03 -07:00
|
|
|
ovsdb_trigger_init(struct ovsdb_session *session, struct ovsdb *db,
|
2011-07-26 10:17:36 -07:00
|
|
|
struct ovsdb_trigger *trigger,
|
2017-12-28 13:21:11 -08:00
|
|
|
struct jsonrpc_msg *request, long long int now,
|
|
|
|
bool read_only, const char *role, const char *id)
|
2009-11-04 15:11:44 -08:00
|
|
|
{
|
2017-12-28 13:21:11 -08:00
|
|
|
ovs_assert(!strcmp(request->method, "transact") ||
|
|
|
|
!strcmp(request->method, "convert"));
|
2011-07-26 10:17:36 -07:00
|
|
|
trigger->session = session;
|
2012-09-07 10:07:03 -07:00
|
|
|
trigger->db = db;
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_push_back(&trigger->db->triggers, &trigger->node);
|
2009-11-04 15:11:44 -08:00
|
|
|
trigger->request = request;
|
2023-03-27 21:43:00 +02:00
|
|
|
trigger->converted_db = NULL;
|
2017-12-28 13:21:11 -08:00
|
|
|
trigger->reply = NULL;
|
2017-12-31 21:15:58 -08:00
|
|
|
trigger->progress = NULL;
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
trigger->txn_forward = NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
trigger->created = now;
|
|
|
|
trigger->timeout_msec = LLONG_MAX;
|
2016-07-29 14:39:29 -07:00
|
|
|
trigger->read_only = read_only;
|
2017-05-31 19:04:32 -04:00
|
|
|
trigger->role = nullable_xstrdup(role);
|
|
|
|
trigger->id = nullable_xstrdup(id);
|
2017-12-28 13:21:11 -08:00
|
|
|
return ovsdb_trigger_try(trigger, now);
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ovsdb_trigger_destroy(struct ovsdb_trigger *trigger)
|
|
|
|
{
|
2017-12-31 21:15:58 -08:00
|
|
|
ovsdb_txn_progress_destroy(trigger->progress);
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
ovsdb_txn_forward_destroy(trigger->db, trigger->txn_forward);
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_remove(&trigger->node);
|
2023-03-27 21:43:00 +02:00
|
|
|
ovsdb_destroy(trigger->converted_db);
|
2017-12-28 13:21:11 -08:00
|
|
|
jsonrpc_msg_destroy(trigger->request);
|
|
|
|
jsonrpc_msg_destroy(trigger->reply);
|
2017-05-31 19:04:32 -04:00
|
|
|
free(trigger->role);
|
|
|
|
free(trigger->id);
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
ovsdb_trigger_is_complete(const struct ovsdb_trigger *trigger)
|
|
|
|
{
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
return trigger->reply && !trigger->progress && !trigger->txn_forward;
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
|
2017-12-28 13:21:11 -08:00
|
|
|
struct jsonrpc_msg *
|
|
|
|
ovsdb_trigger_steal_reply(struct ovsdb_trigger *trigger)
|
2009-11-04 15:11:44 -08:00
|
|
|
{
|
2017-12-28 13:21:11 -08:00
|
|
|
struct jsonrpc_msg *reply = trigger->reply;
|
|
|
|
trigger->reply = NULL;
|
|
|
|
return reply;
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
/* Cancels 'trigger'. 'reason' should be a human-readable reason for log
|
|
|
|
* messages etc. */
|
2009-11-04 15:11:44 -08:00
|
|
|
void
|
2017-12-31 21:15:58 -08:00
|
|
|
ovsdb_trigger_cancel(struct ovsdb_trigger *trigger, const char *reason)
|
2017-12-28 13:21:11 -08:00
|
|
|
{
|
2017-12-31 21:15:58 -08:00
|
|
|
if (trigger->progress) {
|
|
|
|
/* The transaction still might complete asynchronously, but we can stop
|
|
|
|
* tracking it. */
|
|
|
|
ovsdb_txn_progress_destroy(trigger->progress);
|
|
|
|
trigger->progress = NULL;
|
|
|
|
}
|
|
|
|
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
if (trigger->txn_forward) {
|
|
|
|
ovsdb_txn_forward_destroy(trigger->db, trigger->txn_forward);
|
|
|
|
trigger->txn_forward = NULL;
|
|
|
|
}
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
jsonrpc_msg_destroy(trigger->reply);
|
|
|
|
trigger->reply = NULL;
|
|
|
|
|
2017-12-28 13:21:11 -08:00
|
|
|
if (!strcmp(trigger->request->method, "transact")) {
|
2017-12-31 21:15:58 -08:00
|
|
|
/* There's no place to stick 'reason' into the error reply because RFC
|
|
|
|
* 7047 prescribes a fix form for these messages, see section 4.1.4. */
|
|
|
|
trigger->reply = jsonrpc_create_error(json_string_create("canceled"),
|
|
|
|
trigger->request->id);
|
|
|
|
ovsdb_trigger_complete(trigger);
|
2017-12-28 13:21:11 -08:00
|
|
|
} else if (!strcmp(trigger->request->method, "convert")) {
|
2017-12-31 21:15:58 -08:00
|
|
|
trigger_convert_error(
|
|
|
|
trigger,
|
|
|
|
ovsdb_error("canceled", "database conversion canceled because %s",
|
|
|
|
reason));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ovsdb_trigger_prereplace_db(struct ovsdb_trigger *trigger)
|
|
|
|
{
|
|
|
|
if (!ovsdb_trigger_is_complete(trigger)) {
|
|
|
|
if (!strcmp(trigger->request->method, "transact")) {
|
|
|
|
ovsdb_trigger_cancel(trigger, "database schema is changing");
|
|
|
|
} else if (!strcmp(trigger->request->method, "convert")) {
|
|
|
|
/* We don't cancel "convert" requests when a database is being
|
|
|
|
* replaced for two reasons. First, we expect the administrator to
|
|
|
|
* do some kind of sensible synchronization on conversion requests,
|
|
|
|
* that is, it only really makes sense for the admin to do a single
|
|
|
|
* conversion at a time at a scheduled point. Second, if we did
|
|
|
|
* then every "convert" request would end up getting canceled since
|
|
|
|
* "convert" itself causes the database to be replaced. */
|
|
|
|
} else {
|
|
|
|
OVS_NOT_REACHED();
|
|
|
|
}
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-27 21:43:00 +02:00
|
|
|
/* Find among incomplete triggers one that caused database conversion
|
|
|
|
* with specified transaction ID. */
|
|
|
|
struct ovsdb *
|
|
|
|
ovsdb_trigger_find_and_steal_converted_db(const struct ovsdb *db,
|
|
|
|
const struct uuid *txnid)
|
|
|
|
{
|
|
|
|
struct ovsdb *converted_db = NULL;
|
|
|
|
struct ovsdb_trigger *t;
|
|
|
|
|
|
|
|
if (uuid_is_zero(txnid)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
LIST_FOR_EACH_SAFE (t, node, &db->triggers) {
|
|
|
|
if (t->db == db && t->converted_db
|
|
|
|
&& uuid_equals(&t->conversion_txnid, txnid)) {
|
|
|
|
converted_db = t->converted_db;
|
|
|
|
t->converted_db = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return converted_db;
|
|
|
|
}
|
|
|
|
|
2017-12-28 13:21:11 -08:00
|
|
|
bool
|
2009-11-04 15:11:44 -08:00
|
|
|
ovsdb_trigger_run(struct ovsdb *db, long long int now)
|
|
|
|
{
|
2022-03-23 12:56:14 +01:00
|
|
|
struct ovsdb_trigger *t;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
2017-12-28 13:21:11 -08:00
|
|
|
bool run_triggers = db->run_triggers;
|
raft: Avoid busy loop during leader election.
When a server doesn't see a leader yet, e.g. during leader re-election,
if a transaction comes from a client, it will cause 100% CPU busy loop.
With debug log enabled it is like:
2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
...
The problem is that in ovsdb_trigger_try(), all cluster errors are treated
as temporary error and retry immediately. This patch fixes it by introducing
'run_triggers_now', which tells if a retry is needed immediately. When the
cluster error is with detail 'not leader', we don't immediately retry, but
will wait for the next poll event to trigger the retry. When 'not leader'
status changes, there must be a event, i.e. raft RPC that changes the
status, so the trigger is guaranteed to be triggered, without busy loop.
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2020-02-28 18:07:07 -08:00
|
|
|
db->run_triggers_now = db->run_triggers = false;
|
2017-12-28 13:21:11 -08:00
|
|
|
|
|
|
|
bool disconnect_all = false;
|
|
|
|
|
2022-03-23 12:56:14 +01:00
|
|
|
LIST_FOR_EACH_SAFE (t, node, &db->triggers) {
|
2024-01-16 22:52:05 +00:00
|
|
|
cooperative_multitasking_yield();
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
if (run_triggers
|
|
|
|
|| now - t->created >= t->timeout_msec
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
|| t->progress || t->txn_forward) {
|
2017-12-28 13:21:11 -08:00
|
|
|
if (ovsdb_trigger_try(t, now)) {
|
|
|
|
disconnect_all = true;
|
|
|
|
}
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
}
|
2017-12-28 13:21:11 -08:00
|
|
|
return disconnect_all;
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ovsdb_trigger_wait(struct ovsdb *db, long long int now)
|
|
|
|
{
|
raft: Avoid busy loop during leader election.
When a server doesn't see a leader yet, e.g. during leader re-election,
if a transaction comes from a client, it will cause 100% CPU busy loop.
With debug log enabled it is like:
2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
...
The problem is that in ovsdb_trigger_try(), all cluster errors are treated
as temporary error and retry immediately. This patch fixes it by introducing
'run_triggers_now', which tells if a retry is needed immediately. When the
cluster error is with detail 'not leader', we don't immediately retry, but
will wait for the next poll event to trigger the retry. When 'not leader'
status changes, there must be a event, i.e. raft RPC that changes the
status, so the trigger is guaranteed to be triggered, without busy loop.
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2020-02-28 18:07:07 -08:00
|
|
|
if (db->run_triggers_now) {
|
2009-11-04 15:11:44 -08:00
|
|
|
poll_immediate_wake();
|
|
|
|
} else {
|
|
|
|
long long int deadline = LLONG_MAX;
|
|
|
|
struct ovsdb_trigger *t;
|
|
|
|
|
2010-09-17 10:33:10 -07:00
|
|
|
LIST_FOR_EACH (t, node, &db->triggers) {
|
2009-11-04 15:11:44 -08:00
|
|
|
if (t->created < LLONG_MAX - t->timeout_msec) {
|
|
|
|
long long int t_deadline = t->created + t->timeout_msec;
|
|
|
|
if (deadline > t_deadline) {
|
|
|
|
deadline = t_deadline;
|
|
|
|
if (now >= deadline) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (deadline < LLONG_MAX) {
|
2010-05-12 12:53:07 -07:00
|
|
|
poll_timer_wait_until(deadline);
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
2011-07-26 10:17:36 -07:00
|
|
|
ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now)
|
2009-11-04 15:11:44 -08:00
|
|
|
{
|
2017-12-31 21:15:58 -08:00
|
|
|
/* Handle "initialized" state. */
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
if (!t->reply && !t->txn_forward) {
|
2017-12-31 21:15:58 -08:00
|
|
|
ovs_assert(!t->progress);
|
|
|
|
|
|
|
|
struct ovsdb_txn *txn = NULL;
|
|
|
|
if (!strcmp(t->request->method, "transact")) {
|
ovsdb raft: Precheck prereq before proposing commit.
In current OVSDB Raft design, when there are multiple transactions
pending, either from same server node or different nodes in the
cluster, only the first one can be successful at once, and following
ones will fail at the prerequisite check on leader node, because
the first one will update the expected prerequisite eid on leader
node, and the prerequisite used for proposing a commit has to be
committed eid, so it is not possible for a node to use the latest
prerequisite expected by the leader to propose a commit until the
lastest transaction is committed by the leader and updated the
committed_index on the node.
Current implementation proposes the commit as soon as the transaction
is requested by the client, which results in continously retry which
causes high CPU load and waste.
Particularly, even if all clients are using leader_only to connect to
only the leader, the prereq check failure still happens a lot when
a batch of transactions are pending on the leader node - the leader
node proposes a batch of commits using the same committed eid as
prerequisite and it updates the expected prereq as soon as the first
one is in progress, but it needs time to append to followers and wait
until majority replies to update the committed_index, which results in
continously useless retries of the following transactions proposed by
the leader itself.
This patch doesn't change the design but simplely pre-checks if current
eid is same as prereq, before proposing the commit, to avoid waste of
CPU cycles, for both leader and followers. When clients use leader_only
mode, this patch completely eliminates the prereq check failures.
In scale test of OVN with 1k HVs and creating and binding 10k lports,
the patch resulted in 90% CPU cost reduction on leader and >80% CPU cost
reduction on followers. (The test was with leader election base time
set to 10000ms, because otherwise the test couldn't complete because
of the frequent leader re-election.)
This is just one of the related performance problems of the prereq
checking mechanism dicussed at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2019-February/048243.html
Signed-off-by: Han Zhou <hzhou8@ebay.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-03-01 10:56:37 -08:00
|
|
|
if (!ovsdb_txn_precheck_prereq(t->db)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
bool durable, forwarding_needed;
|
2017-12-31 21:15:58 -08:00
|
|
|
|
|
|
|
struct json *result;
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
/* Trying to compose transaction. */
|
2017-12-31 21:15:58 -08:00
|
|
|
txn = ovsdb_execute_compose(
|
|
|
|
t->db, t->session, t->request->params, t->read_only,
|
|
|
|
t->role, t->id, now - t->created, &t->timeout_msec,
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
&durable, &forwarding_needed, &result);
|
2017-12-31 21:15:58 -08:00
|
|
|
if (!txn) {
|
|
|
|
if (result) {
|
|
|
|
/* Complete. There was an error but we still represent it
|
|
|
|
* in JSON-RPC as a successful result. */
|
|
|
|
trigger_success(t, result);
|
|
|
|
} else {
|
|
|
|
/* Unsatisfied "wait" condition. Take no action now, retry
|
|
|
|
* later. */
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
if (forwarding_needed) {
|
|
|
|
/* Transaction is good, but we don't need it. */
|
|
|
|
ovsdb_txn_abort(txn);
|
|
|
|
json_destroy(result);
|
|
|
|
/* Transition to "forwarding" state. */
|
|
|
|
t->txn_forward = ovsdb_txn_forward_create(t->db, t->request);
|
|
|
|
/* Forward will not be completed immediately. Will check
|
|
|
|
* next time. */
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
/* Transition to "committing" state. */
|
|
|
|
t->reply = jsonrpc_create_reply(result, t->request->id);
|
|
|
|
t->progress = ovsdb_txn_propose_commit(txn, durable);
|
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
} else if (!strcmp(t->request->method, "convert")) {
|
|
|
|
/* Permission check. */
|
|
|
|
if (t->role && *t->role) {
|
|
|
|
trigger_convert_error(
|
|
|
|
t, ovsdb_perm_error(
|
|
|
|
"RBAC rules for client \"%s\" role \"%s\" prohibit "
|
|
|
|
"\"convert\" of database %s "
|
|
|
|
"(only the root role may convert databases)",
|
|
|
|
t->id, t->role, t->db->schema->name));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-12-14 02:04:07 +01:00
|
|
|
if (t->read_only) {
|
|
|
|
trigger_convert_error(
|
|
|
|
t, ovsdb_error("not allowed", "conversion is not allowed "
|
|
|
|
"for read-only database %s",
|
|
|
|
t->db->schema->name));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
/* Validate parameters. */
|
|
|
|
const struct json *params = t->request->params;
|
2025-06-24 21:54:33 +02:00
|
|
|
if (params->type != JSON_ARRAY || json_array_size(params) != 2) {
|
2017-12-31 21:15:58 -08:00
|
|
|
trigger_convert_error(t, ovsdb_syntax_error(params, NULL,
|
|
|
|
"array expected"));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Parse new schema and make a converted copy. */
|
2025-06-24 21:54:33 +02:00
|
|
|
const struct json *new_schema_json = json_array_at(params, 1);
|
2017-12-31 21:15:58 -08:00
|
|
|
struct ovsdb_schema *new_schema;
|
|
|
|
struct ovsdb_error *error
|
|
|
|
= ovsdb_schema_from_json(new_schema_json, &new_schema);
|
|
|
|
if (!error && strcmp(new_schema->name, t->db->schema->name)) {
|
|
|
|
error = ovsdb_error("invalid parameters",
|
|
|
|
"new schema name (%s) does not match "
|
|
|
|
"database name (%s)",
|
|
|
|
new_schema->name, t->db->schema->name);
|
|
|
|
}
|
|
|
|
if (!error) {
|
2023-03-27 21:43:00 +02:00
|
|
|
ovsdb_destroy(t->converted_db);
|
|
|
|
error = ovsdb_convert(t->db, new_schema, &t->converted_db);
|
2017-12-31 21:15:58 -08:00
|
|
|
}
|
|
|
|
if (error) {
|
2023-03-27 21:42:57 +02:00
|
|
|
ovsdb_schema_destroy(new_schema);
|
2017-12-31 21:15:58 -08:00
|
|
|
trigger_convert_error(t, error);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-03-27 21:42:59 +02:00
|
|
|
struct json *txn_json;
|
|
|
|
if (ovsdb_conversion_with_no_data_supported(t->db)) {
|
|
|
|
txn_json = json_null_create();
|
|
|
|
} else {
|
|
|
|
/* Make the new copy into a transaction log record. */
|
|
|
|
txn_json = ovsdb_to_txn_json(
|
2023-03-27 21:43:00 +02:00
|
|
|
t->converted_db, "converted by ovsdb-server", true);
|
2023-03-27 21:42:59 +02:00
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
|
|
|
|
/* Propose the change. */
|
|
|
|
t->progress = ovsdb_txn_propose_schema_change(
|
2023-03-27 21:43:00 +02:00
|
|
|
t->db, new_schema, txn_json, &t->conversion_txnid);
|
2023-03-27 21:42:57 +02:00
|
|
|
ovsdb_schema_destroy(new_schema);
|
2017-12-31 21:15:58 -08:00
|
|
|
json_destroy(txn_json);
|
|
|
|
t->reply = jsonrpc_create_reply(json_object_create(),
|
|
|
|
t->request->id);
|
|
|
|
} else {
|
|
|
|
OVS_NOT_REACHED();
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
/* If the transaction committed synchronously, complete it and
|
|
|
|
* transition to "complete". This is more than an optimization because
|
|
|
|
* the file-based storage isn't implemented to read back the
|
|
|
|
* transactions that we write (which is an ugly broken abstraction but
|
|
|
|
* it's what we have). */
|
|
|
|
if (ovsdb_txn_progress_is_complete(t->progress)
|
|
|
|
&& !ovsdb_txn_progress_get_error(t->progress)) {
|
|
|
|
if (txn) {
|
|
|
|
ovsdb_txn_complete(txn);
|
|
|
|
}
|
|
|
|
ovsdb_txn_progress_destroy(t->progress);
|
|
|
|
t->progress = NULL;
|
|
|
|
ovsdb_trigger_complete(t);
|
2023-03-27 21:43:00 +02:00
|
|
|
if (t->converted_db) {
|
|
|
|
ovsdb_replace(t->db, t->converted_db);
|
|
|
|
t->converted_db = NULL;
|
2017-12-31 21:15:58 -08:00
|
|
|
return true;
|
|
|
|
}
|
2017-12-28 13:21:11 -08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
/* Fall through to the general handling for the "committing" state. We
|
|
|
|
* abort the transaction--if and when it eventually commits, we'll read
|
|
|
|
* it back from storage and replay it locally. */
|
|
|
|
if (txn) {
|
|
|
|
ovsdb_txn_abort(txn);
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle "committing" state. */
|
|
|
|
if (t->progress) {
|
|
|
|
if (!ovsdb_txn_progress_is_complete(t->progress)) {
|
|
|
|
return false;
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
|
|
|
|
/* Transition to "complete". */
|
|
|
|
struct ovsdb_error *error
|
|
|
|
= ovsdb_error_clone(ovsdb_txn_progress_get_error(t->progress));
|
|
|
|
ovsdb_txn_progress_destroy(t->progress);
|
|
|
|
t->progress = NULL;
|
|
|
|
|
2017-12-28 13:21:11 -08:00
|
|
|
if (error) {
|
2017-12-31 21:15:58 -08:00
|
|
|
if (!strcmp(ovsdb_error_get_tag(error), "cluster error")) {
|
|
|
|
/* Temporary error. Transition back to "initialized" state to
|
|
|
|
* try again. */
|
raft: Avoid busy loop during leader election.
When a server doesn't see a leader yet, e.g. during leader re-election,
if a transaction comes from a client, it will cause 100% CPU busy loop.
With debug log enabled it is like:
2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
...
The problem is that in ovsdb_trigger_try(), all cluster errors are treated
as temporary error and retry immediately. This patch fixes it by introducing
'run_triggers_now', which tells if a retry is needed immediately. When the
cluster error is with detail 'not leader', we don't immediately retry, but
will wait for the next poll event to trigger the retry. When 'not leader'
status changes, there must be a event, i.e. raft RPC that changes the
status, so the trigger is guaranteed to be triggered, without busy loop.
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2020-02-28 18:07:07 -08:00
|
|
|
char *err_s = ovsdb_error_to_string(error);
|
|
|
|
VLOG_DBG("cluster error %s", err_s);
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
jsonrpc_msg_destroy(t->reply);
|
|
|
|
t->reply = NULL;
|
|
|
|
t->db->run_triggers = true;
|
raft: Avoid busy loop during leader election.
When a server doesn't see a leader yet, e.g. during leader re-election,
if a transaction comes from a client, it will cause 100% CPU busy loop.
With debug log enabled it is like:
2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
...
The problem is that in ovsdb_trigger_try(), all cluster errors are treated
as temporary error and retry immediately. This patch fixes it by introducing
'run_triggers_now', which tells if a retry is needed immediately. When the
cluster error is with detail 'not leader', we don't immediately retry, but
will wait for the next poll event to trigger the retry. When 'not leader'
status changes, there must be a event, i.e. raft RPC that changes the
status, so the trigger is guaranteed to be triggered, without busy loop.
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2020-02-28 18:07:07 -08:00
|
|
|
if (!strstr(err_s, "not leader")) {
|
|
|
|
t->db->run_triggers_now = true;
|
|
|
|
}
|
|
|
|
free(err_s);
|
2017-12-31 21:15:58 -08:00
|
|
|
ovsdb_error_destroy(error);
|
|
|
|
} else {
|
|
|
|
/* Permanent error. Transition to "completed" state to report
|
|
|
|
* it. */
|
|
|
|
if (!strcmp(t->request->method, "transact")) {
|
|
|
|
json_array_add(t->reply->result,
|
|
|
|
ovsdb_error_to_json_free(error));
|
|
|
|
ovsdb_trigger_complete(t);
|
|
|
|
} else if (!strcmp(t->request->method, "convert")) {
|
|
|
|
jsonrpc_msg_destroy(t->reply);
|
|
|
|
t->reply = NULL;
|
|
|
|
trigger_convert_error(t, error);
|
2025-06-05 16:51:30 +02:00
|
|
|
} else {
|
|
|
|
OVS_NOT_REACHED();
|
2017-12-31 21:15:58 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Success. */
|
|
|
|
ovsdb_trigger_complete(t);
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|
|
|
|
|
ovsdb: relay: Add support for transaction forwarding.
Current version of ovsdb relay allows to scale out read-only
access to the primary database. However, many clients are not
read-only but read-mostly. For example, ovn-controller.
In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only. Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client. At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.
This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.
Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding. Later, ovsdb_relay_run() will send all new transactions
to the relay source. Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply. After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client. Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-04-15 19:05:40 +02:00
|
|
|
return false;
|
|
|
|
} else if (t->txn_forward) {
|
|
|
|
/* Handle "forwarding" state. */
|
|
|
|
if (!ovsdb_txn_forward_is_complete(t->txn_forward)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Transition to "complete". */
|
|
|
|
ovs_assert(!t->reply);
|
|
|
|
t->reply = ovsdb_txn_forward_steal_reply(t->txn_forward);
|
|
|
|
ovsdb_txn_forward_destroy(t->db, t->txn_forward);
|
|
|
|
t->txn_forward = NULL;
|
|
|
|
ovsdb_trigger_complete(t);
|
2017-12-31 21:15:58 -08:00
|
|
|
return false;
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
|
|
|
|
OVS_NOT_REACHED();
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-12-31 21:15:58 -08:00
|
|
|
ovsdb_trigger_complete(struct ovsdb_trigger *t)
|
2009-11-04 15:11:44 -08:00
|
|
|
{
|
2017-12-31 21:15:58 -08:00
|
|
|
ovs_assert(t->reply);
|
2016-03-25 14:10:22 -07:00
|
|
|
ovs_list_remove(&t->node);
|
|
|
|
ovs_list_push_back(&t->session->completions, &t->node);
|
2009-11-04 15:11:44 -08:00
|
|
|
}
|
2017-12-28 13:21:11 -08:00
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
/* Makes a "convert" request into an error.
|
|
|
|
*
|
|
|
|
* This is not suitable for "transact" requests because their replies should
|
|
|
|
* never be bare ovsdb_errors: RFC 7047 says that their replies must either be
|
|
|
|
* a JSON-RPC reply that contains an array of operation replies (which can be
|
|
|
|
* errors), or a JSON-RPC error whose "error" member is simply "canceled". */
|
2017-12-28 13:21:11 -08:00
|
|
|
static void
|
2017-12-31 21:15:58 -08:00
|
|
|
trigger_convert_error(struct ovsdb_trigger *t, struct ovsdb_error *error)
|
2017-12-28 13:21:11 -08:00
|
|
|
{
|
2017-12-31 21:15:58 -08:00
|
|
|
ovs_assert(!strcmp(t->request->method, "convert"));
|
|
|
|
ovs_assert(error && !t->reply);
|
|
|
|
t->reply = jsonrpc_create_error(
|
2017-12-28 13:21:11 -08:00
|
|
|
ovsdb_error_to_json_free(error), t->request->id);
|
2017-12-31 21:15:58 -08:00
|
|
|
ovsdb_trigger_complete(t);
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
trigger_success(struct ovsdb_trigger *t, struct json *result)
|
|
|
|
{
|
2017-12-31 21:15:58 -08:00
|
|
|
ovs_assert(result && !t->reply);
|
|
|
|
t->reply = jsonrpc_create_reply(result, t->request->id);
|
|
|
|
ovsdb_trigger_complete(t);
|
2017-12-28 13:21:11 -08:00
|
|
|
}
|