dpif-netdev: Add group rxq scheduling assignment type.

Add an rxq scheduling option that allows rxqs to be grouped on a pmd based purely on their load. The current default 'cycles' assignment sorts rxqs by measured processing load and then assigns them to a list of round robin PMDs. This helps to keep the rxqs that require most processing on different cores but as it selects the PMDs in round robin order, it equally distributes rxqs to PMDs. 'cycles' assignment has the advantage in that it separates the most loaded rxqs from being on the same core but maintains the rxqs being spread across a broad range of PMDs to mitigate against changes to traffic pattern. 'cycles' assignment has the disadvantage that in order to make the trade off between optimising for current traffic load and mitigating against future changes, it tries to assign and equal amount of rxqs per PMD in a round robin manner and this can lead to a less than optimal balance of the processing load. Now that PMD auto load balance can help mitigate with future changes in traffic patterns, a 'group' assignment can be used to assign rxqs based on their measured cycles and the estimated running total of the PMDs. In this case, there is no restriction about keeping equal number of rxqs per PMD as it is purely load based. This means that one PMD may have a group of low load rxqs assigned to it while another PMD has one high load rxq assigned to it, as that is the best balance of their measured loads across the PMDs. Signed-off-by: Kevin Traynor <ktraynor@redhat.com> Acked-by: Sunil Pai G <sunil.pai.g@intel.com> Acked-by: David Marchand <david.marchand@redhat.com> Signed-off-by: Ian Stokes <ian.stokes@intel.com>
2025-08-31 06:15:47 +00:00 · 2021-07-16 17:02:09 +01:00
parent 4fb54652e0
commit 3dd050909a
5 changed files with 90 additions and 6 deletions
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -226,7 +226,8 @@ struct pmd_auto_lb {

 enum sched_assignment_type {
    SCHED_ROUNDROBIN,
-    SCHED_CYCLES /* Default.*/
+    SCHED_CYCLES, /* Default.*/
+    SCHED_GROUP
 };

 /* Datapath based on the network device interface from netdev.h.
@@ -4219,6 +4220,8 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
        pmd_rxq_assign_type = SCHED_ROUNDROBIN;
    } else if (!strcmp(pmd_rxq_assign, "cycles")) {
        pmd_rxq_assign_type = SCHED_CYCLES;
+    } else if (!strcmp(pmd_rxq_assign, "group")) {
+        pmd_rxq_assign_type = SCHED_GROUP;
    } else {
        /* Default. */
        VLOG_WARN("Unsupported rx queue to PMD assignment mode in "
@@ -5061,6 +5064,34 @@ compare_rxq_cycles(const void *a, const void *b)
    }
 }

+static struct sched_pmd *
+sched_pmd_get_lowest(struct sched_numa *numa, bool has_cyc)
+{
+    struct sched_pmd *lowest_sched_pmd = NULL;
+    uint64_t lowest_num = UINT64_MAX;
+
+    for (unsigned i = 0; i < numa->n_pmds; i++) {
+        struct sched_pmd *sched_pmd;
+        uint64_t pmd_num;
+
+        sched_pmd = &numa->pmds[i];
+        if (sched_pmd->isolated) {
+            continue;
+        }
+        if (has_cyc) {
+            pmd_num = sched_pmd->pmd_proc_cycles;
+        } else {
+            pmd_num = sched_pmd->n_rxq;
+        }
+
+        if (pmd_num < lowest_num) {
+            lowest_num = pmd_num;
+            lowest_sched_pmd = sched_pmd;
+        }
+    }
+    return lowest_sched_pmd;
+}
+
 /*
 * Returns the next pmd from the numa node.
 *
@@ -5119,8 +5150,14 @@ sched_pmd_next_noniso_rr(struct sched_numa *numa, bool updown)
 }

 static struct sched_pmd *
-sched_pmd_next(struct sched_numa *numa, enum sched_assignment_type algo)
+sched_pmd_next(struct sched_numa *numa, enum sched_assignment_type algo,
+               bool has_proc)
 {
+    if (algo == SCHED_GROUP) {
+        return sched_pmd_get_lowest(numa, has_proc);
+    }
+
+    /* By default RR the PMDs. */
    return sched_pmd_next_noniso_rr(numa, algo == SCHED_CYCLES ? true : false);
 }

@@ -5130,6 +5167,7 @@ get_assignment_type_string(enum sched_assignment_type algo)
    switch (algo) {
    case SCHED_ROUNDROBIN: return "roundrobin";
    case SCHED_CYCLES: return "cycles";
+    case SCHED_GROUP: return "group";
    default: return "Unknown";
    }
 }
@@ -5291,7 +5329,7 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list,
            }

            /* Select the PMD that should be used for this rxq. */
-            sched_pmd = sched_pmd_next(numa, algo);
+            sched_pmd = sched_pmd_next(numa, algo, proc_cycles ? true : false);
            if (sched_pmd) {
                VLOG(level, "Core %2u on numa node %d assigned port \'%s\' "
                            "rx queue %d%s.",