2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-29 13:27:59 +00:00

dpif-netdev: Add parameters to configure PMD auto load balance.

Two important parts of how PMD auto load balance operates are how
loaded a core needs to be and how much improvement is estimated
before a PMD auto load balance can trigger.

Previously they were hardcoded to 95% loaded and 25% variance
improvement.

These default values may not be suitable for all use cases and
we may want to use a more (or less) aggressive rebalance, either
on the pmd load threshold or on the minimum variance improvement
threshold.

The defaults are not changed, but "pmd-auto-lb-load-threshold" and
"pmd-auto-lb-improvement-threshold" parameters are added to override
the defaults.

$ ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-load-threshold="70"
$ ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-improvement-threshold="20"

Signed-off-by: Christophe Fontaine <cfontain@redhat.com>
Co-Authored-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Acked-by: Ian Stokes <ian.stokes@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Christophe Fontaine 2020-12-21 13:10:31 +00:00 committed by Ilya Maximets
parent e4db0b69e2
commit 62ab5594c2
3 changed files with 79 additions and 10 deletions

3
NEWS
View File

@ -20,6 +20,9 @@ Post-v2.14.0
* Add generic IP protocol support to conntrack. With this change, all * Add generic IP protocol support to conntrack. With this change, all
none UDP, TCP, and ICMP traffic will be treated as general L3 none UDP, TCP, and ICMP traffic will be treated as general L3
traffic, i.e. using 3 tupples. traffic, i.e. using 3 tupples.
* Add parameters 'pmd-auto-lb-load-threshold' and
'pmd-auto-lb-improvement-threshold' to configure PMD auto load balance
behaviour.
- The environment variable OVS_UNBOUND_CONF, if set, is now used - The environment variable OVS_UNBOUND_CONF, if set, is now used
as the DNS resolver's (unbound) configuration file. as the DNS resolver's (unbound) configuration file.
- Linux datapath: - Linux datapath:

View File

@ -85,9 +85,9 @@
VLOG_DEFINE_THIS_MODULE(dpif_netdev); VLOG_DEFINE_THIS_MODULE(dpif_netdev);
/* Auto Load Balancing Defaults */ /* Auto Load Balancing Defaults */
#define ALB_ACCEPTABLE_IMPROVEMENT 25 #define ALB_IMPROVEMENT_THRESHOLD 25
#define ALB_PMD_LOAD_THRESHOLD 95 #define ALB_LOAD_THRESHOLD 95
#define ALB_PMD_REBALANCE_POLL_INTERVAL 1 /* 1 Min */ #define ALB_REBALANCE_INTERVAL 1 /* 1 Min */
#define MIN_TO_MSEC 60000 #define MIN_TO_MSEC 60000
#define FLOW_DUMP_MAX_BATCH 50 #define FLOW_DUMP_MAX_BATCH 50
@ -300,6 +300,8 @@ struct pmd_auto_lb {
bool is_enabled; /* Current status of Auto load balancing. */ bool is_enabled; /* Current status of Auto load balancing. */
uint64_t rebalance_intvl; uint64_t rebalance_intvl;
uint64_t rebalance_poll_timer; uint64_t rebalance_poll_timer;
uint8_t rebalance_improve_thresh;
atomic_uint8_t rebalance_load_thresh;
}; };
/* Datapath based on the network device interface from netdev.h. /* Datapath based on the network device interface from netdev.h.
@ -4204,6 +4206,7 @@ set_pmd_auto_lb(struct dp_netdev *dp)
unsigned int cnt = 0; unsigned int cnt = 0;
struct dp_netdev_pmd_thread *pmd; struct dp_netdev_pmd_thread *pmd;
struct pmd_auto_lb *pmd_alb = &dp->pmd_alb; struct pmd_auto_lb *pmd_alb = &dp->pmd_alb;
uint8_t rebalance_load_thresh;
bool enable_alb = false; bool enable_alb = false;
bool multi_rxq = false; bool multi_rxq = false;
@ -4233,9 +4236,16 @@ set_pmd_auto_lb(struct dp_netdev *dp)
if (pmd_alb->is_enabled != enable_alb) { if (pmd_alb->is_enabled != enable_alb) {
pmd_alb->is_enabled = enable_alb; pmd_alb->is_enabled = enable_alb;
if (pmd_alb->is_enabled) { if (pmd_alb->is_enabled) {
atomic_read_relaxed(&pmd_alb->rebalance_load_thresh,
&rebalance_load_thresh);
VLOG_INFO("PMD auto load balance is enabled " VLOG_INFO("PMD auto load balance is enabled "
"interval %"PRIu64" mins", "interval %"PRIu64" mins, "
pmd_alb->rebalance_intvl / MIN_TO_MSEC); "pmd load threshold %"PRIu8"%%, "
"improvement threshold %"PRIu8"%%",
pmd_alb->rebalance_intvl / MIN_TO_MSEC,
rebalance_load_thresh,
pmd_alb->rebalance_improve_thresh);
} else { } else {
pmd_alb->rebalance_poll_timer = 0; pmd_alb->rebalance_poll_timer = 0;
VLOG_INFO("PMD auto load balance is disabled"); VLOG_INFO("PMD auto load balance is disabled");
@ -4259,6 +4269,8 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
uint32_t insert_min, cur_min; uint32_t insert_min, cur_min;
uint32_t tx_flush_interval, cur_tx_flush_interval; uint32_t tx_flush_interval, cur_tx_flush_interval;
uint64_t rebalance_intvl; uint64_t rebalance_intvl;
uint8_t rebalance_load, cur_rebalance_load;
uint8_t rebalance_improve;
tx_flush_interval = smap_get_int(other_config, "tx-flush-interval", tx_flush_interval = smap_get_int(other_config, "tx-flush-interval",
DEFAULT_TX_FLUSH_INTERVAL); DEFAULT_TX_FLUSH_INTERVAL);
@ -4336,7 +4348,7 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
false); false);
rebalance_intvl = smap_get_int(other_config, "pmd-auto-lb-rebal-interval", rebalance_intvl = smap_get_int(other_config, "pmd-auto-lb-rebal-interval",
ALB_PMD_REBALANCE_POLL_INTERVAL); ALB_REBALANCE_INTERVAL);
/* Input is in min, convert it to msec. */ /* Input is in min, convert it to msec. */
rebalance_intvl = rebalance_intvl =
@ -4348,6 +4360,30 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
"%"PRIu64" mins\n", rebalance_intvl / MIN_TO_MSEC); "%"PRIu64" mins\n", rebalance_intvl / MIN_TO_MSEC);
} }
rebalance_improve = smap_get_int(other_config,
"pmd-auto-lb-improvement-threshold",
ALB_IMPROVEMENT_THRESHOLD);
if (rebalance_improve > 100) {
rebalance_improve = ALB_IMPROVEMENT_THRESHOLD;
}
if (rebalance_improve != pmd_alb->rebalance_improve_thresh) {
pmd_alb->rebalance_improve_thresh = rebalance_improve;
VLOG_INFO("PMD auto load balance improvement threshold set to "
"%"PRIu8"%%", rebalance_improve);
}
rebalance_load = smap_get_int(other_config, "pmd-auto-lb-load-threshold",
ALB_LOAD_THRESHOLD);
if (rebalance_load > 100) {
rebalance_load = ALB_LOAD_THRESHOLD;
}
atomic_read_relaxed(&pmd_alb->rebalance_load_thresh, &cur_rebalance_load);
if (rebalance_load != cur_rebalance_load) {
atomic_store_relaxed(&pmd_alb->rebalance_load_thresh,
rebalance_load);
VLOG_INFO("PMD auto load balance load threshold set to %"PRIu8"%%",
rebalance_load);
}
set_pmd_auto_lb(dp); set_pmd_auto_lb(dp);
return 0; return 0;
} }
@ -5676,7 +5712,7 @@ pmd_rebalance_dry_run(struct dp_netdev *dp)
improvement = improvement =
((curr_variance - new_variance) * 100) / curr_variance; ((curr_variance - new_variance) * 100) / curr_variance;
} }
if (improvement < ALB_ACCEPTABLE_IMPROVEMENT) { if (improvement < dp->pmd_alb.rebalance_improve_thresh) {
ret = false; ret = false;
} }
} }
@ -8711,6 +8747,7 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
if (pmd->ctx.now > pmd->rxq_next_cycle_store) { if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
uint64_t curr_tsc; uint64_t curr_tsc;
uint8_t rebalance_load_trigger;
struct pmd_auto_lb *pmd_alb = &pmd->dp->pmd_alb; struct pmd_auto_lb *pmd_alb = &pmd->dp->pmd_alb;
if (pmd_alb->is_enabled && !pmd->isolated if (pmd_alb->is_enabled && !pmd->isolated
&& (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >= && (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
@ -8727,7 +8764,9 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
pmd_load = ((tot_proc * 100) / (tot_idle + tot_proc)); pmd_load = ((tot_proc * 100) / (tot_idle + tot_proc));
} }
if (pmd_load >= ALB_PMD_LOAD_THRESHOLD) { atomic_read_relaxed(&pmd_alb->rebalance_load_thresh,
&rebalance_load_trigger);
if (pmd_load >= rebalance_load_trigger) {
atomic_count_inc(&pmd->pmd_overloaded); atomic_count_inc(&pmd->pmd_overloaded);
} else { } else {
atomic_count_set(&pmd->pmd_overloaded, 0); atomic_count_set(&pmd->pmd_overloaded, 0);

View File

@ -653,8 +653,9 @@
type='{"type": "boolean"}'> type='{"type": "boolean"}'>
<p> <p>
Configures PMD Auto Load Balancing that allows automatic assignment of Configures PMD Auto Load Balancing that allows automatic assignment of
RX queues to PMDs if any of PMDs is overloaded (i.e. processing cycles RX queues to PMDs if any of PMDs is overloaded (i.e. a processing
> 95%). cycles >
<ref column="other_config" key="pmd-auto-lb-load-threshold"/>).
</p> </p>
<p> <p>
It uses current scheme of cycle based assignment of RX queues that It uses current scheme of cycle based assignment of RX queues that
@ -690,6 +691,32 @@
once in few hours or a day or a week. once in few hours or a day or a week.
</p> </p>
</column> </column>
<column name="other_config" key="pmd-auto-lb-load-threshold"
type='{"type": "integer", "minInteger": 0, "maxInteger": 100}'>
<p>
Specifies the minimum PMD thread load threshold (% of used cycles) of
any non-isolated PMD threads when a PMD Auto Load Balance may be
triggered.
</p>
<p>
The default value is <code>95%</code>.
</p>
</column>
<column name="other_config" key="pmd-auto-lb-improvement-threshold"
type='{"type": "integer", "minInteger": 0, "maxInteger": 100}'>
<p>
Specifies the minimum evaluated % improvement in load distribution
across the non-isolated PMD threads that will allow a PMD Auto Load
Balance to occur.
</p>
<p>
Note, setting this parameter to 0 will always allow an auto load
balance to occur regardless of estimated improvement or not.
</p>
<p>
The default value is <code>25%</code>.
</p>
</column>
<column name="other_config" key="userspace-tso-enable" <column name="other_config" key="userspace-tso-enable"
type='{"type": "boolean"}'> type='{"type": "boolean"}'>
<p> <p>