2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 01:51:26 +00:00

handlers: Fix handlers mapping.

The handler and CPU mapping in upcalls are incorrect, and this is
specially noticeable systems with cpu isolation enabled.

Say we have a 12 core system where only every even number CPU is enabled
C0, C2, C4, C6, C8, C10

This means we will create an array of size 6 that will be sent to
kernel that is populated with sockets [S0, S1, S2, S3, S4, S5]

The problem is when the kernel does an upcall it checks the socket array
via the index of the CPU, effectively adding additional load on some
CPUs while leaving no work on other CPUs.

e.g.

C0  indexes to S0
C2  indexes to S2 (should be S1)
C4  indexes to S4 (should be S2)

Modulo of 6 (size of socket array) is applied, so we wrap back to S0
C6  indexes to S0 (should be S3)
C8  indexes to S2 (should be S4)
C10 indexes to S4 (should be S5)

Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5
get no work assigned to them

This leads to the kernel to throw the following message:
"openvswitch: cpu_id mismatch with handler threads"

Instead we will send the kernel a corrected array of sockets the size
of all CPUs in the system, or the largest core_id on the system, which
ever one is greatest. This is to take care of systems with non-continous
core cpus.

In the above example we would create a
corrected array in a round-robin(assuming prime bias) fashion as follows:
[S0, S1, S2, S3, S4, S5, S6, S0, S1, S2, S3, S4]

Fixes: b1e517bd2f81 ("dpif-netlink: Introduce per-cpu upcall dispatch.")
Co-authored-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Michael Santana <msantana@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Michael Santana 2022-08-09 03:18:15 -04:00 committed by Ilya Maximets
parent a5cacea5f9
commit 2803b3fb53
3 changed files with 57 additions and 7 deletions

View File

@ -801,14 +801,28 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
uint32_t n_upcall_pids)
{
struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
int largest_cpu_id = ovs_numa_get_largest_core_id();
struct dpif_netlink_dp request, reply;
struct ofpbuf *bufp;
int error;
int n_cores;
n_cores = count_cpu_cores();
ovs_assert(n_cores == n_upcall_pids);
VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores);
uint32_t *corrected;
int error, i, n_cores;
if (largest_cpu_id == OVS_NUMA_UNSPEC) {
largest_cpu_id = -1;
}
/* Some systems have non-continuous cpu core ids. count_total_cores()
* would return an accurate number, however, this number cannot be used.
* e.g. If the largest core_id of a system is cpu9, but the system only
* has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
* warning. With the MAX() in place in this example we send an array of
* size 10 and prevent the warning. This has no bearing on the number of
* threads created.
*/
n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
n_cores);
dpif_netlink_dp_init(&request);
request.cmd = OVS_DP_CMD_SET;
@ -817,7 +831,12 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
request.user_features = dpif->user_features |
OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
request.upcall_pids = upcall_pids;
corrected = xcalloc(n_cores, sizeof *corrected);
for (i = 0; i < n_cores; i++) {
corrected[i] = upcall_pids[i % n_upcall_pids];
}
request.upcall_pids = corrected;
request.n_upcall_pids = n_cores;
error = dpif_netlink_dp_transact(&request, &reply, &bufp);
@ -825,9 +844,10 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
dpif->user_features = reply.user_features;
ofpbuf_delete(bufp);
if (!dpif_netlink_upcall_per_cpu(dpif)) {
return -EOPNOTSUPP;
error = -EOPNOTSUPP;
}
}
free(corrected);
return error;
}

View File

@ -387,6 +387,35 @@ ovs_numa_get_n_cores_on_numa(int numa_id)
return OVS_CORE_UNSPEC;
}
/* Returns the largest core_id.
*
* Return OVS_CORE_UNSPEC, if core_id information is not found.
*
* Returning OVS_CORE_UNSPEC comes at a caveat. The caller function
* must remember to check the return value of this callee function
* against OVS_CORE_UNSPEC. OVS_CORE_UNSPEC is a positive integer
* INT_MAX, which the caller may interpret it as the largest
* core_id if it's not checking for it.
*/
unsigned
ovs_numa_get_largest_core_id(void)
{
struct cpu_core *core;
unsigned max_id = 0;
if (!found_numa_and_core) {
return OVS_CORE_UNSPEC;
}
HMAP_FOR_EACH (core, hmap_node, &all_cpu_cores) {
if (core->core_id > max_id) {
max_id = core->core_id;
}
}
return max_id;
}
static struct ovs_numa_dump *
ovs_numa_dump_create(void)
{

View File

@ -56,6 +56,7 @@ int ovs_numa_get_n_numas(void);
int ovs_numa_get_n_cores(void);
int ovs_numa_get_numa_id(unsigned core_id);
int ovs_numa_get_n_cores_on_numa(int numa_id);
unsigned ovs_numa_get_largest_core_id(void);
struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id);
struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask);
struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n);