2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 06:15:47 +00:00

dpif-netdev: Add group rxq scheduling assignment type.

Add an rxq scheduling option that allows rxqs to be grouped
on a pmd based purely on their load.

The current default 'cycles' assignment sorts rxqs by measured
processing load and then assigns them to a list of round robin PMDs.
This helps to keep the rxqs that require most processing on different
cores but as it selects the PMDs in round robin order, it equally
distributes rxqs to PMDs.

'cycles' assignment has the advantage in that it separates the most
loaded rxqs from being on the same core but maintains the rxqs being
spread across a broad range of PMDs to mitigate against changes to
traffic pattern.

'cycles' assignment has the disadvantage that in order to make the
trade off between optimising for current traffic load and mitigating
against future changes, it tries to assign and equal amount of rxqs
per PMD in a round robin manner and this can lead to a less than optimal
balance of the processing load.

Now that PMD auto load balance can help mitigate with future changes in
traffic patterns, a 'group' assignment can be used to assign rxqs based
on their measured cycles and the estimated running total of the PMDs.

In this case, there is no restriction about keeping equal number of
rxqs per PMD as it is purely load based.

This means that one PMD may have a group of low load rxqs assigned to it
while another PMD has one high load rxq assigned to it, as that is the
best balance of their measured loads across the PMDs.

Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
Kevin Traynor
2021-07-16 17:02:09 +01:00
committed by Ian Stokes
parent 4fb54652e0
commit 3dd050909a
5 changed files with 90 additions and 6 deletions

View File

@@ -226,7 +226,8 @@ struct pmd_auto_lb {
enum sched_assignment_type {
SCHED_ROUNDROBIN,
SCHED_CYCLES /* Default.*/
SCHED_CYCLES, /* Default.*/
SCHED_GROUP
};
/* Datapath based on the network device interface from netdev.h.
@@ -4219,6 +4220,8 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
pmd_rxq_assign_type = SCHED_ROUNDROBIN;
} else if (!strcmp(pmd_rxq_assign, "cycles")) {
pmd_rxq_assign_type = SCHED_CYCLES;
} else if (!strcmp(pmd_rxq_assign, "group")) {
pmd_rxq_assign_type = SCHED_GROUP;
} else {
/* Default. */
VLOG_WARN("Unsupported rx queue to PMD assignment mode in "
@@ -5061,6 +5064,34 @@ compare_rxq_cycles(const void *a, const void *b)
}
}
static struct sched_pmd *
sched_pmd_get_lowest(struct sched_numa *numa, bool has_cyc)
{
struct sched_pmd *lowest_sched_pmd = NULL;
uint64_t lowest_num = UINT64_MAX;
for (unsigned i = 0; i < numa->n_pmds; i++) {
struct sched_pmd *sched_pmd;
uint64_t pmd_num;
sched_pmd = &numa->pmds[i];
if (sched_pmd->isolated) {
continue;
}
if (has_cyc) {
pmd_num = sched_pmd->pmd_proc_cycles;
} else {
pmd_num = sched_pmd->n_rxq;
}
if (pmd_num < lowest_num) {
lowest_num = pmd_num;
lowest_sched_pmd = sched_pmd;
}
}
return lowest_sched_pmd;
}
/*
* Returns the next pmd from the numa node.
*
@@ -5119,8 +5150,14 @@ sched_pmd_next_noniso_rr(struct sched_numa *numa, bool updown)
}
static struct sched_pmd *
sched_pmd_next(struct sched_numa *numa, enum sched_assignment_type algo)
sched_pmd_next(struct sched_numa *numa, enum sched_assignment_type algo,
bool has_proc)
{
if (algo == SCHED_GROUP) {
return sched_pmd_get_lowest(numa, has_proc);
}
/* By default RR the PMDs. */
return sched_pmd_next_noniso_rr(numa, algo == SCHED_CYCLES ? true : false);
}
@@ -5130,6 +5167,7 @@ get_assignment_type_string(enum sched_assignment_type algo)
switch (algo) {
case SCHED_ROUNDROBIN: return "roundrobin";
case SCHED_CYCLES: return "cycles";
case SCHED_GROUP: return "group";
default: return "Unknown";
}
}
@@ -5291,7 +5329,7 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list,
}
/* Select the PMD that should be used for this rxq. */
sched_pmd = sched_pmd_next(numa, algo);
sched_pmd = sched_pmd_next(numa, algo, proc_cycles ? true : false);
if (sched_pmd) {
VLOG(level, "Core %2u on numa node %d assigned port \'%s\' "
"rx queue %d%s.",