2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 09:58:01 +00:00

ovsdb: raft: Don't forward more than one command to the leader.

Every transaction has RAFT log prerequisites.  Even if transactions
are not related (because RAFT doesn't actually know what data it is
handling).  When leader writes a new record to a RAFT storage, it is
getting appended to the log right away and changes current 'eid',
i.e., changes prerequisites.  The leader will not try to write new
records until the current one is committed, because until then the
pre-check will be failing.

However, that is different for the follower.  Followers do not add
records to the RAFT log until the leader sends an append request back.
So, if there are multiple transactions pending on a follower, it will
create a command for each of them and prerequisites will be set to the
same values.  All these commands will be sent to the leader, but only
one can succeed at a time, because accepting one command immediately
changes prerequisites and all other commands become non-applicable.
So, out of N commands, 1 will succeed and N - 1 will fail.  The cluster
failure is a transient failure, so the follower will re-process all the
failed transactions and send them again.  1 will succeed and N - 2 will
fail.  And so on, until there are no more transactions.  In the end,
instead of processing N transactions, the follower is performing
N * (N - 1) / 2 transaction processing iterations.  That is consuming
a huge amount of CPU resources completely unnecessarily.

Since there is no real chance for multiple transactions from the same
follower to succeed, it's better to not send them in the first place.
This also eliminates prerequisite mismatch messages on a leader in
this particular case.

In a test with 30 parallel shell threads executing 12K transactions
total with separate ovsdb-client calls through the same follower there
is about 60% performance improvement.  The test takes ~100 seconds to
complete without this change and ~40 seconds with this change applied.
The new time is very close to what it takes to execute the same test
through the cluster leader.  The test can be found at the link below.

Note: prerequisite failures on a leader are still possible, but mostly
in a case of simultaneous transactions from different followers.  It's
a normal thing for a distributed database due to its nature.

Link: https://mail.openvswitch.org/pipermail/ovs-dev/2024-June/415167.html
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Ilya Maximets 2024-06-27 00:02:21 +02:00
parent 773b0fb593
commit f8ed13355d
5 changed files with 55 additions and 12 deletions

View File

@ -2307,12 +2307,55 @@ raft_get_eid(const struct raft *raft, uint64_t index)
return &raft->snap.eid; return &raft->snap.eid;
} }
const struct uuid * static const struct uuid *
raft_current_eid(const struct raft *raft) raft_current_eid(const struct raft *raft)
{ {
return raft_get_eid(raft, raft->log_end - 1); return raft_get_eid(raft, raft->log_end - 1);
} }
bool
raft_precheck_prereq(const struct raft *raft, const struct uuid *prereq)
{
if (!uuid_equals(raft_current_eid(raft), prereq)) {
VLOG_DBG("%s: prerequisites (" UUID_FMT ") "
"do not match current eid (" UUID_FMT ")",
__func__, UUID_ARGS(prereq),
UUID_ARGS(raft_current_eid(raft)));
return false;
}
/* Incomplete commands on a leader will not change the leader's current
* 'eid' on commit as they are already part of the leader's log. */
if (raft->role == RAFT_LEADER) {
return true;
}
/* Having incomplete commands on a follower means that the leader has
* these commands and they will change the prerequisites once added to
* the leader's log.
*
* There is a chance that all these commands will actually fail and the
* record with current prerequisites will in fact succeed, but, since
* these are our own commands, the chances are low. */
struct raft_command *cmd;
HMAP_FOR_EACH (cmd, hmap_node, &raft->commands) {
/* Skip commands that are already part of the log (have non-zero
* index) and ones that do not carry any data (have zero 'eid'),
* as they can't change prerequisites.
*
* Database will not re-run triggers unless the data changes or
* one of the data-carrying triggers completes. So, pre-check must
* not fail if there are no outstanding data-carrying commands. */
if (!cmd->index && !uuid_is_zero(&cmd->eid)) {
VLOG_DBG("%s: follower still has an incomplete command "
UUID_FMT, __func__, UUID_ARGS(&cmd->eid));
return false;
}
}
return true;
}
static struct raft_command * static struct raft_command *
raft_command_create_completed(enum raft_command_status status) raft_command_create_completed(enum raft_command_status status)
{ {

View File

@ -189,5 +189,5 @@ struct ovsdb_error *raft_store_snapshot(struct raft *,
void raft_take_leadership(struct raft *); void raft_take_leadership(struct raft *);
void raft_transfer_leadership(struct raft *, const char *reason); void raft_transfer_leadership(struct raft *, const char *reason);
const struct uuid *raft_current_eid(const struct raft *); bool raft_precheck_prereq(const struct raft *, const struct uuid *prereq);
#endif /* lib/raft.h */ #endif /* lib/raft.h */

View File

@ -661,11 +661,12 @@ ovsdb_storage_write_schema_change(struct ovsdb_storage *storage,
return w; return w;
} }
const struct uuid * bool
ovsdb_storage_peek_last_eid(struct ovsdb_storage *storage) ovsdb_storage_precheck_prereq(const struct ovsdb_storage *storage,
const struct uuid *prereq)
{ {
if (!storage->raft) { if (!storage->raft) {
return NULL; return true;
} }
return raft_current_eid(storage->raft); return raft_precheck_prereq(storage->raft, prereq);
} }

View File

@ -96,6 +96,9 @@ struct ovsdb_storage *ovsdb_storage_open_standalone(const char *filename,
bool rw); bool rw);
struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *); struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *);
const struct uuid *ovsdb_storage_peek_last_eid(struct ovsdb_storage *); /* Checks that there is a chance for a record with specified prerequisites
* to be successfully written to the storage. */
bool ovsdb_storage_precheck_prereq(const struct ovsdb_storage *,
const struct uuid *prereq);
#endif /* ovsdb/storage.h */ #endif /* ovsdb/storage.h */

View File

@ -1277,11 +1277,7 @@ struct ovsdb_txn_progress {
bool bool
ovsdb_txn_precheck_prereq(const struct ovsdb *db) ovsdb_txn_precheck_prereq(const struct ovsdb *db)
{ {
const struct uuid *eid = ovsdb_storage_peek_last_eid(db->storage); return ovsdb_storage_precheck_prereq(db->storage, &db->prereq);
if (!eid) {
return true;
}
return uuid_equals(&db->prereq, eid);
} }
struct ovsdb_txn_progress * struct ovsdb_txn_progress *