2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 18:07:40 +00:00

ovsdb raft: Precheck prereq before proposing commit.

In current OVSDB Raft design, when there are multiple transactions
pending, either from same server node or different nodes in the
cluster, only the first one can be successful at once, and following
ones will fail at the prerequisite check on leader node, because
the first one will update the expected prerequisite eid on leader
node, and the prerequisite used for proposing a commit has to be
committed eid, so it is not possible for a node to use the latest
prerequisite expected by the leader to propose a commit until the
lastest transaction is committed by the leader and updated the
committed_index on the node.

Current implementation proposes the commit as soon as the transaction
is requested by the client, which results in continously retry which
causes high CPU load and waste.

Particularly, even if all clients are using leader_only to connect to
only the leader, the prereq check failure still happens a lot when
a batch of transactions are pending on the leader node - the leader
node proposes a batch of commits using the same committed eid as
prerequisite and it updates the expected prereq as soon as the first
one is in progress, but it needs time to append to followers and wait
until majority replies to update the committed_index, which results in
continously useless retries of the following transactions proposed by
the leader itself.

This patch doesn't change the design but simplely pre-checks if current
eid is same as prereq, before proposing the commit, to avoid waste of
CPU cycles, for both leader and followers. When clients use leader_only
mode, this patch completely eliminates the prereq check failures.

In scale test of OVN with 1k HVs and creating and binding 10k lports,
the patch resulted in 90% CPU cost reduction on leader and >80% CPU cost
reduction on followers. (The test was with leader election base time
set to 10000ms, because otherwise the test couldn't complete because
of the frequent leader re-election.)

This is just one of the related performance problems of the prereq
checking mechanism dicussed at:

https://mail.openvswitch.org/pipermail/ovs-discuss/2019-February/048243.html
Signed-off-by: Han Zhou <hzhou8@ebay.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
Han Zhou 2019-03-01 10:56:37 -08:00 committed by Ben Pfaff
parent 2a7be04ea9
commit 2cd62f75c1
8 changed files with 28 additions and 4 deletions

View File

@ -39,9 +39,6 @@ OVSDB Clustering To-do List
* Include index with monitor update? * Include index with monitor update?
* Back off when transaction fails to commit? Definitely back off until
the eid changes for prereq failures
* Testing with replication. * Testing with replication.
* Handling bad transactions in read_db(). (Kill the database?) * Handling bad transactions in read_db(). (Kill the database?)

View File

@ -1906,7 +1906,7 @@ raft_get_eid(const struct raft *raft, uint64_t index)
return &raft->snap.eid; return &raft->snap.eid;
} }
static const struct uuid * const struct uuid *
raft_current_eid(const struct raft *raft) raft_current_eid(const struct raft *raft)
{ {
return raft_get_eid(raft, raft->log_end - 1); return raft_get_eid(raft, raft->log_end - 1);

View File

@ -180,4 +180,5 @@ struct ovsdb_error *raft_store_snapshot(struct raft *,
void raft_take_leadership(struct raft *); void raft_take_leadership(struct raft *);
void raft_transfer_leadership(struct raft *, const char *reason); void raft_transfer_leadership(struct raft *, const char *reason);
const struct uuid *raft_current_eid(const struct raft *);
#endif /* lib/raft.h */ #endif /* lib/raft.h */

View File

@ -601,3 +601,12 @@ ovsdb_storage_write_schema_change(struct ovsdb_storage *storage,
} }
return w; return w;
} }
const struct uuid *
ovsdb_storage_peek_last_eid(struct ovsdb_storage *storage)
{
if (!storage->raft) {
return NULL;
}
return raft_current_eid(storage->raft);
}

View File

@ -91,4 +91,6 @@ struct ovsdb_storage *ovsdb_storage_open_standalone(const char *filename,
bool rw); bool rw);
struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *); struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *);
const struct uuid *ovsdb_storage_peek_last_eid(struct ovsdb_storage *);
#endif /* ovsdb/storage.h */ #endif /* ovsdb/storage.h */

View File

@ -1011,6 +1011,16 @@ struct ovsdb_txn_progress {
struct ovsdb_storage *storage; struct ovsdb_storage *storage;
}; };
bool
ovsdb_txn_precheck_prereq(const struct ovsdb *db)
{
const struct uuid *eid = ovsdb_storage_peek_last_eid(db->storage);
if (!eid) {
return true;
}
return uuid_equals(&db->prereq, eid);
}
struct ovsdb_txn_progress * struct ovsdb_txn_progress *
ovsdb_txn_propose_schema_change(struct ovsdb *db, ovsdb_txn_propose_schema_change(struct ovsdb *db,
const struct json *schema, const struct json *schema,

View File

@ -29,6 +29,7 @@ void ovsdb_txn_set_txnid(const struct uuid *, struct ovsdb_txn *);
const struct uuid *ovsdb_txn_get_txnid(const struct ovsdb_txn *); const struct uuid *ovsdb_txn_get_txnid(const struct ovsdb_txn *);
void ovsdb_txn_abort(struct ovsdb_txn *); void ovsdb_txn_abort(struct ovsdb_txn *);
bool ovsdb_txn_precheck_prereq(const struct ovsdb *db);
struct ovsdb_error *ovsdb_txn_replay_commit(struct ovsdb_txn *) struct ovsdb_error *ovsdb_txn_replay_commit(struct ovsdb_txn *)
OVS_WARN_UNUSED_RESULT; OVS_WARN_UNUSED_RESULT;
struct ovsdb_txn_progress *ovsdb_txn_propose_commit(struct ovsdb_txn *, struct ovsdb_txn_progress *ovsdb_txn_propose_commit(struct ovsdb_txn *,

View File

@ -194,6 +194,10 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now)
struct ovsdb_txn *txn = NULL; struct ovsdb_txn *txn = NULL;
struct ovsdb *newdb = NULL; struct ovsdb *newdb = NULL;
if (!strcmp(t->request->method, "transact")) { if (!strcmp(t->request->method, "transact")) {
if (!ovsdb_txn_precheck_prereq(t->db)) {
return false;
}
bool durable; bool durable;
struct json *result; struct json *result;