mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 09:58:01 +00:00
ovsdb: raft: Don't forward more than one command to the leader.
Every transaction has RAFT log prerequisites. Even if transactions are not related (because RAFT doesn't actually know what data it is handling). When leader writes a new record to a RAFT storage, it is getting appended to the log right away and changes current 'eid', i.e., changes prerequisites. The leader will not try to write new records until the current one is committed, because until then the pre-check will be failing. However, that is different for the follower. Followers do not add records to the RAFT log until the leader sends an append request back. So, if there are multiple transactions pending on a follower, it will create a command for each of them and prerequisites will be set to the same values. All these commands will be sent to the leader, but only one can succeed at a time, because accepting one command immediately changes prerequisites and all other commands become non-applicable. So, out of N commands, 1 will succeed and N - 1 will fail. The cluster failure is a transient failure, so the follower will re-process all the failed transactions and send them again. 1 will succeed and N - 2 will fail. And so on, until there are no more transactions. In the end, instead of processing N transactions, the follower is performing N * (N - 1) / 2 transaction processing iterations. That is consuming a huge amount of CPU resources completely unnecessarily. Since there is no real chance for multiple transactions from the same follower to succeed, it's better to not send them in the first place. This also eliminates prerequisite mismatch messages on a leader in this particular case. In a test with 30 parallel shell threads executing 12K transactions total with separate ovsdb-client calls through the same follower there is about 60% performance improvement. The test takes ~100 seconds to complete without this change and ~40 seconds with this change applied. The new time is very close to what it takes to execute the same test through the cluster leader. The test can be found at the link below. Note: prerequisite failures on a leader are still possible, but mostly in a case of simultaneous transactions from different followers. It's a normal thing for a distributed database due to its nature. Link: https://mail.openvswitch.org/pipermail/ovs-dev/2024-June/415167.html Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
parent
773b0fb593
commit
f8ed13355d
45
ovsdb/raft.c
45
ovsdb/raft.c
@ -2307,12 +2307,55 @@ raft_get_eid(const struct raft *raft, uint64_t index)
|
||||
return &raft->snap.eid;
|
||||
}
|
||||
|
||||
const struct uuid *
|
||||
static const struct uuid *
|
||||
raft_current_eid(const struct raft *raft)
|
||||
{
|
||||
return raft_get_eid(raft, raft->log_end - 1);
|
||||
}
|
||||
|
||||
bool
|
||||
raft_precheck_prereq(const struct raft *raft, const struct uuid *prereq)
|
||||
{
|
||||
if (!uuid_equals(raft_current_eid(raft), prereq)) {
|
||||
VLOG_DBG("%s: prerequisites (" UUID_FMT ") "
|
||||
"do not match current eid (" UUID_FMT ")",
|
||||
__func__, UUID_ARGS(prereq),
|
||||
UUID_ARGS(raft_current_eid(raft)));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Incomplete commands on a leader will not change the leader's current
|
||||
* 'eid' on commit as they are already part of the leader's log. */
|
||||
if (raft->role == RAFT_LEADER) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Having incomplete commands on a follower means that the leader has
|
||||
* these commands and they will change the prerequisites once added to
|
||||
* the leader's log.
|
||||
*
|
||||
* There is a chance that all these commands will actually fail and the
|
||||
* record with current prerequisites will in fact succeed, but, since
|
||||
* these are our own commands, the chances are low. */
|
||||
struct raft_command *cmd;
|
||||
HMAP_FOR_EACH (cmd, hmap_node, &raft->commands) {
|
||||
/* Skip commands that are already part of the log (have non-zero
|
||||
* index) and ones that do not carry any data (have zero 'eid'),
|
||||
* as they can't change prerequisites.
|
||||
*
|
||||
* Database will not re-run triggers unless the data changes or
|
||||
* one of the data-carrying triggers completes. So, pre-check must
|
||||
* not fail if there are no outstanding data-carrying commands. */
|
||||
if (!cmd->index && !uuid_is_zero(&cmd->eid)) {
|
||||
VLOG_DBG("%s: follower still has an incomplete command "
|
||||
UUID_FMT, __func__, UUID_ARGS(&cmd->eid));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct raft_command *
|
||||
raft_command_create_completed(enum raft_command_status status)
|
||||
{
|
||||
|
@ -189,5 +189,5 @@ struct ovsdb_error *raft_store_snapshot(struct raft *,
|
||||
void raft_take_leadership(struct raft *);
|
||||
void raft_transfer_leadership(struct raft *, const char *reason);
|
||||
|
||||
const struct uuid *raft_current_eid(const struct raft *);
|
||||
bool raft_precheck_prereq(const struct raft *, const struct uuid *prereq);
|
||||
#endif /* lib/raft.h */
|
||||
|
@ -661,11 +661,12 @@ ovsdb_storage_write_schema_change(struct ovsdb_storage *storage,
|
||||
return w;
|
||||
}
|
||||
|
||||
const struct uuid *
|
||||
ovsdb_storage_peek_last_eid(struct ovsdb_storage *storage)
|
||||
bool
|
||||
ovsdb_storage_precheck_prereq(const struct ovsdb_storage *storage,
|
||||
const struct uuid *prereq)
|
||||
{
|
||||
if (!storage->raft) {
|
||||
return NULL;
|
||||
return true;
|
||||
}
|
||||
return raft_current_eid(storage->raft);
|
||||
return raft_precheck_prereq(storage->raft, prereq);
|
||||
}
|
||||
|
@ -96,6 +96,9 @@ struct ovsdb_storage *ovsdb_storage_open_standalone(const char *filename,
|
||||
bool rw);
|
||||
struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *);
|
||||
|
||||
const struct uuid *ovsdb_storage_peek_last_eid(struct ovsdb_storage *);
|
||||
/* Checks that there is a chance for a record with specified prerequisites
|
||||
* to be successfully written to the storage. */
|
||||
bool ovsdb_storage_precheck_prereq(const struct ovsdb_storage *,
|
||||
const struct uuid *prereq);
|
||||
|
||||
#endif /* ovsdb/storage.h */
|
||||
|
@ -1277,11 +1277,7 @@ struct ovsdb_txn_progress {
|
||||
bool
|
||||
ovsdb_txn_precheck_prereq(const struct ovsdb *db)
|
||||
{
|
||||
const struct uuid *eid = ovsdb_storage_peek_last_eid(db->storage);
|
||||
if (!eid) {
|
||||
return true;
|
||||
}
|
||||
return uuid_equals(&db->prereq, eid);
|
||||
return ovsdb_storage_precheck_prereq(db->storage, &db->prereq);
|
||||
}
|
||||
|
||||
struct ovsdb_txn_progress *
|
||||
|
Loading…
x
Reference in New Issue
Block a user