2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 06:15:47 +00:00

dpif-netdev: Add per PMD sleep config.

Extend 'pmd-sleep-max' so that individual PMD thread cores may have a
specified max sleep request value.

Existing behaviour is maintained.

Any PMD thread core without a value will use the global value if set
or default no sleep.

To set PMD thread cores 8 and 9 to never request a load based sleep
and all other PMD thread cores to be able to request a max sleep of
50 usecs:

 $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0

To set PMD thread cores 10 and 11 to request a max sleep of 100 usecs
and all other PMD thread cores to never request a sleep:

 $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=10:100,11:100

'pmd-sleep-show' is updated to show the max sleep value for each PMD
thread.

Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Kevin Traynor
2023-12-14 11:15:15 +00:00
committed by Ilya Maximets
parent a095794bcc
commit 4cbbf56e6c
6 changed files with 644 additions and 50 deletions

View File

@@ -179,6 +179,11 @@ static struct odp_support dp_netdev_support = {
/* Time in uS to increment a pmd thread sleep time. */
#define PMD_SLEEP_INC_US 1
struct pmd_sleep {
unsigned core_id;
uint64_t max_sleep;
};
struct dpcls {
struct cmap_node node; /* Within dp_netdev_pmd_thread.classifiers */
odp_port_t in_port;
@@ -287,8 +292,8 @@ struct dp_netdev {
atomic_uint32_t emc_insert_min;
/* Enable collection of PMD performance metrics. */
atomic_bool pmd_perf_metrics;
/* Max load based sleep request. */
atomic_uint64_t pmd_max_sleep;
/* Default max load based sleep request. */
uint64_t pmd_max_sleep_default;
/* Enable the SMC cache from ovsdb config */
atomic_bool smc_enable_db;
@@ -326,6 +331,9 @@ struct dp_netdev {
/* Cpu mask for pin of pmd threads. */
char *pmd_cmask;
/* PMD max load based sleep request user string. */
char *max_sleep_list;
uint64_t last_tnl_conf_seq;
struct conntrack *conntrack;
@@ -1428,6 +1436,19 @@ dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
ds_destroy(&reply);
}
static void
pmd_info_show_sleep(struct ds *reply, unsigned core_id, int numa_id,
uint64_t pmd_max_sleep)
{
if (core_id == NON_PMD_CORE_ID) {
return;
}
ds_put_format(reply,
"pmd thread numa_id %d core_id %d:\n"
" max sleep: %4"PRIu64" us\n",
numa_id, core_id, pmd_max_sleep);
}
static void
dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
void *aux)
@@ -1442,9 +1463,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
unsigned int secs = 0;
unsigned long long max_secs = (PMD_INTERVAL_LEN * PMD_INTERVAL_MAX)
/ INTERVAL_USEC_TO_SEC;
uint64_t default_max_sleep = 0;
bool show_header = true;
uint64_t max_sleep;
ovs_mutex_lock(&dp_netdev_mutex);
@@ -1512,12 +1532,13 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
} else if (type == PMD_INFO_SLEEP_SHOW) {
if (show_header) {
atomic_read_relaxed(&dp->pmd_max_sleep, &default_max_sleep);
ds_put_format(&reply, "Default max sleep: %4"PRIu64" us",
default_max_sleep);
ds_put_cstr(&reply, "\n");
ds_put_format(&reply, "Default max sleep: %4"PRIu64" us\n",
dp->pmd_max_sleep_default);
show_header = false;
}
atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
pmd_info_show_sleep(&reply, pmd->core_id, pmd->numa_id,
max_sleep);
}
}
free(pmd_list);
@@ -1906,6 +1927,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
return error;
}
dp->max_sleep_list = NULL;
dp->last_tnl_conf_seq = seq_read(tnl_conf_seq);
*dpp = dp;
return 0;
@@ -2015,6 +2038,7 @@ dp_netdev_free(struct dp_netdev *dp)
dp_netdev_meter_destroy(dp);
free(dp->max_sleep_list);
free(dp->pmd_cmask);
free(CONST_CAST(char *, dp->name));
free(dp);
@@ -4847,6 +4871,209 @@ set_pmd_auto_lb(struct dp_netdev *dp, bool state, bool always_log)
}
}
static int
parse_pmd_sleep_list(const char *max_sleep_list,
struct pmd_sleep **pmd_sleeps)
{
char *list, *copy, *key, *value;
int num_vals = 0;
if (!max_sleep_list) {
return num_vals;
}
list = copy = xstrdup(max_sleep_list);
while (ofputil_parse_key_value(&list, &key, &value)) {
uint64_t temp, pmd_max_sleep;
char *error = NULL;
unsigned core;
int i;
error = str_to_u64(key, &temp);
if (error) {
free(error);
continue;
}
if (value[0] == '\0') {
/* No value specified. key is dp default. */
core = UINT_MAX;
pmd_max_sleep = temp;
} else {
error = str_to_u64(value, &pmd_max_sleep);
if (!error && temp < UINT_MAX) {
/* Key is pmd core id. */
core = (unsigned) temp;
} else {
free(error);
continue;
}
}
/* Detect duplicate max sleep values. */
for (i = 0; i < num_vals; i++) {
if ((*pmd_sleeps)[i].core_id == core) {
break;
}
}
if (i == num_vals) {
/* Not duplicate, add a new entry. */
*pmd_sleeps = xrealloc(*pmd_sleeps,
(num_vals + 1) * sizeof **pmd_sleeps);
num_vals++;
}
pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
(*pmd_sleeps)[i].core_id = core;
(*pmd_sleeps)[i].max_sleep = pmd_max_sleep;
}
free(copy);
return num_vals;
}
static void
log_pmd_sleep(unsigned core_id, int numa_id, uint64_t pmd_max_sleep)
{
if (core_id == NON_PMD_CORE_ID) {
return;
}
VLOG_INFO("PMD thread on numa_id: %d, core id: %2d, "
"max sleep: %4"PRIu64" us.", numa_id, core_id, pmd_max_sleep);
}
static void
pmd_init_max_sleep(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
{
uint64_t max_sleep = dp->pmd_max_sleep_default;
struct pmd_sleep *pmd_sleeps = NULL;
int num_vals;
num_vals = parse_pmd_sleep_list(dp->max_sleep_list, &pmd_sleeps);
/* Check if the user has set a specific value for this pmd. */
for (int i = 0; i < num_vals; i++) {
if (pmd_sleeps[i].core_id == pmd->core_id) {
max_sleep = pmd_sleeps[i].max_sleep;
break;
}
}
atomic_init(&pmd->max_sleep, max_sleep);
log_pmd_sleep(pmd->core_id, pmd->numa_id, max_sleep);
free(pmd_sleeps);
}
static bool
assign_sleep_values_to_pmds(struct dp_netdev *dp, int num_vals,
struct pmd_sleep *pmd_sleeps)
{
struct dp_netdev_pmd_thread *pmd;
bool value_changed = false;
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
uint64_t new_max_sleep, cur_pmd_max_sleep;
if (pmd->core_id == NON_PMD_CORE_ID) {
continue;
}
/* Default to global value. */
new_max_sleep = dp->pmd_max_sleep_default;
/* Check for pmd specific value. */
for (int i = 0; i < num_vals; i++) {
if (pmd->core_id == pmd_sleeps[i].core_id) {
new_max_sleep = pmd_sleeps[i].max_sleep;
break;
}
}
atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
if (new_max_sleep != cur_pmd_max_sleep) {
atomic_store_relaxed(&pmd->max_sleep, new_max_sleep);
value_changed = true;
}
}
return value_changed;
}
static void
log_all_pmd_sleeps(struct dp_netdev *dp)
{
struct dp_netdev_pmd_thread **pmd_list = NULL;
struct dp_netdev_pmd_thread *pmd;
size_t n;
VLOG_INFO("Default PMD thread max sleep: %4"PRIu64" us.",
dp->pmd_max_sleep_default);
sorted_poll_thread_list(dp, &pmd_list, &n);
for (size_t i = 0; i < n; i++) {
uint64_t cur_pmd_max_sleep;
pmd = pmd_list[i];
atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
log_pmd_sleep(pmd->core_id, pmd->numa_id, cur_pmd_max_sleep);
}
free(pmd_list);
}
static bool
set_all_pmd_max_sleeps(struct dp_netdev *dp, const struct smap *config)
{
const char *max_sleep_list = smap_get(config, "pmd-sleep-max");
struct pmd_sleep *pmd_sleeps = NULL;
uint64_t default_max_sleep = 0;
bool default_changed = false;
bool pmd_changed = false;
uint64_t pmd_maxsleep;
int num_vals = 0;
/* Check for deprecated 'pmd-maxsleep' value. */
pmd_maxsleep = smap_get_ullong(config, "pmd-maxsleep", UINT64_MAX);
if (pmd_maxsleep != UINT64_MAX && !max_sleep_list) {
VLOG_WARN_ONCE("pmd-maxsleep is deprecated. "
"Please use pmd-sleep-max instead.");
default_max_sleep = pmd_maxsleep;
}
/* Check if there is no change in string or value. */
if (!!dp->max_sleep_list == !!max_sleep_list) {
if (max_sleep_list
? nullable_string_is_equal(max_sleep_list, dp->max_sleep_list)
: default_max_sleep == dp->pmd_max_sleep_default) {
return false;
}
}
/* Free existing string and copy new one (if any). */
free(dp->max_sleep_list);
dp->max_sleep_list = nullable_xstrdup(max_sleep_list);
if (max_sleep_list) {
num_vals = parse_pmd_sleep_list(max_sleep_list, &pmd_sleeps);
/* Check if the user has set a global value. */
for (int i = 0; i < num_vals; i++) {
if (pmd_sleeps[i].core_id == UINT_MAX) {
default_max_sleep = pmd_sleeps[i].max_sleep;
break;
}
}
}
if (dp->pmd_max_sleep_default != default_max_sleep) {
dp->pmd_max_sleep_default = default_max_sleep;
default_changed = true;
}
pmd_changed = assign_sleep_values_to_pmds(dp, num_vals, pmd_sleeps);
free(pmd_sleeps);
return default_changed || pmd_changed;
}
/* Applies datapath configuration from the database. Some of the changes are
* actually applied in dpif_netdev_run(). */
static int
@@ -4864,7 +5091,6 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
uint64_t rebalance_intvl;
uint8_t cur_rebalance_load;
uint32_t rebalance_load, rebalance_improve;
uint64_t pmd_max_sleep, cur_pmd_max_sleep;
bool log_autolb = false;
enum sched_assignment_type pmd_rxq_assign_type;
static bool first_set_config = true;
@@ -5015,26 +5241,12 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
set_pmd_auto_lb(dp, autolb_state, log_autolb);
pmd_max_sleep = smap_get_ullong(other_config, "pmd-maxsleep", UINT64_MAX);
if (pmd_max_sleep != UINT64_MAX) {
VLOG_WARN("pmd-maxsleep is deprecated. "
"Please use pmd-sleep-max instead.");
} else {
pmd_max_sleep = 0;
bool sleep_changed = set_all_pmd_max_sleeps(dp, other_config);
if (first_set_config || sleep_changed) {
log_all_pmd_sleeps(dp);
}
pmd_max_sleep = smap_get_ullong(other_config, "pmd-sleep-max",
pmd_max_sleep);
pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
atomic_read_relaxed(&dp->pmd_max_sleep, &cur_pmd_max_sleep);
if (first_set_config || pmd_max_sleep != cur_pmd_max_sleep) {
atomic_store_relaxed(&dp->pmd_max_sleep, pmd_max_sleep);
VLOG_INFO("PMD max sleep request is %"PRIu64" usecs.", pmd_max_sleep);
VLOG_INFO("PMD load based sleeps are %s.",
pmd_max_sleep ? "enabled" : "disabled" );
}
first_set_config = false;
first_set_config = false;
return 0;
}
@@ -7063,7 +7275,7 @@ reload:
pmd_perf_start_iteration(s);
atomic_read_relaxed(&pmd->dp->smc_enable_db, &pmd->ctx.smc_enable_db);
atomic_read_relaxed(&pmd->dp->pmd_max_sleep, &max_sleep);
atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
for (i = 0; i < poll_cnt; i++) {
@@ -7650,6 +7862,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
hmap_init(&pmd->send_port_cache);
cmap_init(&pmd->tx_bonds);
pmd_init_max_sleep(dp, pmd);
/* Initialize DPIF function pointer to the default configured version. */
atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default());