handlers: Fix handlers mapping.

The handler and CPU mapping in upcalls are incorrect, and this is specially noticeable systems with cpu isolation enabled. Say we have a 12 core system where only every even number CPU is enabled C0, C2, C4, C6, C8, C10 This means we will create an array of size 6 that will be sent to kernel that is populated with sockets [S0, S1, S2, S3, S4, S5] The problem is when the kernel does an upcall it checks the socket array via the index of the CPU, effectively adding additional load on some CPUs while leaving no work on other CPUs. e.g. C0 indexes to S0 C2 indexes to S2 (should be S1) C4 indexes to S4 (should be S2) Modulo of 6 (size of socket array) is applied, so we wrap back to S0 C6 indexes to S0 (should be S3) C8 indexes to S2 (should be S4) C10 indexes to S4 (should be S5) Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5 get no work assigned to them This leads to the kernel to throw the following message: "openvswitch: cpu_id mismatch with handler threads" Instead we will send the kernel a corrected array of sockets the size of all CPUs in the system, or the largest core_id on the system, which ever one is greatest. This is to take care of systems with non-continous core cpus. In the above example we would create a corrected array in a round-robin(assuming prime bias) fashion as follows: [S0, S1, S2, S3, S4, S5, S6, S0, S1, S2, S3, S4] Fixes: b1e517bd2f81 ("dpif-netlink: Introduce per-cpu upcall dispatch.") Co-authored-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Michael Santana <msantana@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-08-22 09:58:01 +00:00 · 2022-08-09 03:18:15 -04:00 · 2022-08-09 03:18:15 -04:00 · 2803b3fb53
commit 2803b3fb53
parent a5cacea5f9
3 changed files with 57 additions and 7 deletions
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@ -801,14 +801,28 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
                              uint32_t n_upcall_pids)
 {
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
+    int largest_cpu_id = ovs_numa_get_largest_core_id();
    struct dpif_netlink_dp request, reply;
    struct ofpbuf *bufp;
-    int error;
-    int n_cores;

-    n_cores = count_cpu_cores();
-    ovs_assert(n_cores == n_upcall_pids);
-    VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores);
+    uint32_t *corrected;
+    int error, i, n_cores;
+
+    if (largest_cpu_id == OVS_NUMA_UNSPEC) {
+        largest_cpu_id = -1;
+    }
+
+    /* Some systems have non-continuous cpu core ids.  count_total_cores()
+     * would return an accurate number, however, this number cannot be used.
+     * e.g. If the largest core_id of a system is cpu9, but the system only
+     * has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
+     * warning.  With the MAX() in place in this example we send an array of
+     * size 10 and prevent the warning.  This has no bearing on the number of
+     * threads created.
+     */
+    n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
+    VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
+             n_cores);

    dpif_netlink_dp_init(&request);
    request.cmd = OVS_DP_CMD_SET;
@ -817,7 +831,12 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
    request.user_features = dpif->user_features |
                            OVS_DP_F_DISPATCH_UPCALL_PER_CPU;

-    request.upcall_pids = upcall_pids;
+    corrected = xcalloc(n_cores, sizeof *corrected);
+
+    for (i = 0; i < n_cores; i++) {
+        corrected[i] = upcall_pids[i % n_upcall_pids];
+    }
+    request.upcall_pids = corrected;
    request.n_upcall_pids = n_cores;

    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
@ -825,9 +844,10 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
        dpif->user_features = reply.user_features;
        ofpbuf_delete(bufp);
        if (!dpif_netlink_upcall_per_cpu(dpif)) {
-            return -EOPNOTSUPP;
+            error = -EOPNOTSUPP;
        }
    }
+    free(corrected);
    return error;
 }

--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@ -387,6 +387,35 @@ ovs_numa_get_n_cores_on_numa(int numa_id)
    return OVS_CORE_UNSPEC;
 }

+/* Returns the largest core_id.
+ *
+ * Return OVS_CORE_UNSPEC, if core_id information is not found.
+ *
+ * Returning OVS_CORE_UNSPEC comes at a caveat.  The caller function
+ * must remember to check the return value of this callee function
+ * against OVS_CORE_UNSPEC.  OVS_CORE_UNSPEC is a positive integer
+ * INT_MAX, which the caller may interpret it as the largest
+ * core_id if it's not checking for it.
+ */
+unsigned
+ovs_numa_get_largest_core_id(void)
+{
+    struct cpu_core *core;
+    unsigned max_id = 0;
+
+    if (!found_numa_and_core) {
+        return OVS_CORE_UNSPEC;
+    }
+
+    HMAP_FOR_EACH (core, hmap_node, &all_cpu_cores) {
+        if (core->core_id > max_id) {
+            max_id = core->core_id;
+        }
+    }
+
+    return max_id;
+}
+
 static struct ovs_numa_dump *
 ovs_numa_dump_create(void)
 {
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@ -56,6 +56,7 @@ int ovs_numa_get_n_numas(void);
 int ovs_numa_get_n_cores(void);
 int ovs_numa_get_numa_id(unsigned core_id);
 int ovs_numa_get_n_cores_on_numa(int numa_id);
+unsigned ovs_numa_get_largest_core_id(void);
 struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id);
 struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask);
 struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n);