2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-04 00:05:15 +00:00

dpif-netdev: Report overhead busy cycles per pmd.

Users complained that per rxq pmd usage was confusing: summing those
values per pmd would never reach 100% even if increasing traffic load
beyond pmd capacity.

This is because the dpif-netdev/pmd-rxq-show command only reports "pure"
rxq cycles while some cycles are used in the pmd mainloop and adds up to
the total pmd load.

dpif-netdev/pmd-stats-show does report per pmd load usage.
This load is measured since the last dpif-netdev/pmd-stats-clear call.
On the other hand, the per rxq pmd usage reflects the pmd load on a 10s
sliding window which makes it non trivial to correlate.

Gather per pmd busy cycles with the same periodicity and report the
difference as overhead in dpif-netdev/pmd-rxq-show so that we have all
info in a single command.

Example:
$ ovs-appctl dpif-netdev/pmd-rxq-show
pmd thread numa_id 1 core_id 3:
  isolated : true
  port: dpdk0             queue-id:  0 (enabled)   pmd usage: 90 %
  overhead:  4 %
pmd thread numa_id 1 core_id 5:
  isolated : false
  port: vhost0            queue-id:  0 (enabled)   pmd usage:  0 %
  port: vhost1            queue-id:  0 (enabled)   pmd usage: 93 %
  port: vhost2            queue-id:  0 (enabled)   pmd usage:  0 %
  port: vhost6            queue-id:  0 (enabled)   pmd usage:  0 %
  overhead:  6 %
pmd thread numa_id 1 core_id 31:
  isolated : true
  port: dpdk1             queue-id:  0 (enabled)   pmd usage: 86 %
  overhead:  4 %
pmd thread numa_id 1 core_id 33:
  isolated : false
  port: vhost3            queue-id:  0 (enabled)   pmd usage:  0 %
  port: vhost4            queue-id:  0 (enabled)   pmd usage:  0 %
  port: vhost5            queue-id:  0 (enabled)   pmd usage: 92 %
  port: vhost7            queue-id:  0 (enabled)   pmd usage:  0 %
  overhead:  7 %

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
David Marchand
2021-07-16 18:21:16 +02:00
committed by Ian Stokes
parent 30bfba0249
commit 3222a89d9a
4 changed files with 93 additions and 33 deletions

View File

@@ -195,6 +195,11 @@ queue::
due to traffic pattern or reconfig changes, will take one minute to be fully
reflected in the stats.
.. versionchanged:: 2.16.0
A ``overhead`` statistics is shown per PMD: it represents the number of
cycles inherently consumed by the OVS PMD processing loop.
Rx queue to PMD assignment takes place whenever there are configuration changes
or can be triggered by using::

View File

@@ -99,13 +99,18 @@ struct dp_netdev_pmd_thread {
long long int next_optimization;
/* End of the next time interval for which processing cycles
are stored for each polled rxq. */
long long int rxq_next_cycle_store;
long long int next_cycle_store;
/* Last interval timestamp. */
uint64_t intrvl_tsc_prev;
/* Last interval cycles. */
atomic_ullong intrvl_cycles;
/* Write index for 'busy_cycles_intrvl'. */
unsigned int intrvl_idx;
/* Busy cycles in last PMD_INTERVAL_MAX intervals. */
atomic_ullong *busy_cycles_intrvl;
/* Current context of the PMD thread. */
struct dp_netdev_pmd_thread_ctx ctx;

View File

@@ -155,11 +155,11 @@ static struct odp_support dp_netdev_support = {
/* Time in microseconds of the interval in which rxq processing cycles used
* in rxq to pmd assignments is measured and stored. */
#define PMD_RXQ_INTERVAL_LEN 10000000LL
#define PMD_INTERVAL_LEN 10000000LL
/* Number of intervals for which cycles are stored
* and used during rxq to pmd assignment. */
#define PMD_RXQ_INTERVAL_MAX 6
#define PMD_INTERVAL_MAX 6
/* Time in microseconds to try RCU quiescing. */
#define PMD_RCU_QUIESCE_INTERVAL 10000LL
@@ -379,9 +379,9 @@ struct dp_netdev_rxq {
/* Counters of cycles spent successfully polling and processing pkts. */
atomic_ullong cycles[RXQ_N_CYCLES];
/* We store PMD_RXQ_INTERVAL_MAX intervals of data for an rxq and then
/* We store PMD_INTERVAL_MAX intervals of data for an rxq and then
sum them to yield the cycles used for an rxq. */
atomic_ullong cycles_intrvl[PMD_RXQ_INTERVAL_MAX];
atomic_ullong cycles_intrvl[PMD_INTERVAL_MAX];
};
/* A port in a netdev-based datapath. */
@@ -791,6 +791,8 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
struct rxq_poll *list;
size_t n_rxq;
uint64_t total_cycles = 0;
uint64_t busy_cycles = 0;
uint64_t total_rxq_proc_cycles = 0;
ds_put_format(reply,
"pmd thread numa_id %d core_id %u:\n isolated : %s\n",
@@ -803,16 +805,27 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
/* Get the total pmd cycles for an interval. */
atomic_read_relaxed(&pmd->intrvl_cycles, &total_cycles);
/* Estimate the cycles to cover all intervals. */
total_cycles *= PMD_RXQ_INTERVAL_MAX;
total_cycles *= PMD_INTERVAL_MAX;
for (int j = 0; j < PMD_INTERVAL_MAX; j++) {
uint64_t cycles;
atomic_read_relaxed(&pmd->busy_cycles_intrvl[j], &cycles);
busy_cycles += cycles;
}
if (busy_cycles > total_cycles) {
busy_cycles = total_cycles;
}
for (int i = 0; i < n_rxq; i++) {
struct dp_netdev_rxq *rxq = list[i].rxq;
const char *name = netdev_rxq_get_name(rxq->rx);
uint64_t proc_cycles = 0;
uint64_t rxq_proc_cycles = 0;
for (int j = 0; j < PMD_RXQ_INTERVAL_MAX; j++) {
proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
for (int j = 0; j < PMD_INTERVAL_MAX; j++) {
rxq_proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
}
total_rxq_proc_cycles += rxq_proc_cycles;
ds_put_format(reply, " port: %-16s queue-id: %2d", name,
netdev_rxq_get_queue_id(list[i].rxq->rx));
ds_put_format(reply, " %s", netdev_rxq_enabled(list[i].rxq->rx)
@@ -820,13 +833,30 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
ds_put_format(reply, " pmd usage: ");
if (total_cycles) {
ds_put_format(reply, "%2"PRIu64"",
proc_cycles * 100 / total_cycles);
rxq_proc_cycles * 100 / total_cycles);
ds_put_cstr(reply, " %");
} else {
ds_put_format(reply, "%s", "NOT AVAIL");
}
ds_put_cstr(reply, "\n");
}
if (n_rxq > 0) {
ds_put_cstr(reply, " overhead: ");
if (total_cycles) {
uint64_t overhead_cycles = 0;
if (total_rxq_proc_cycles < busy_cycles) {
overhead_cycles = busy_cycles - total_rxq_proc_cycles;
}
ds_put_format(reply, "%2"PRIu64" %%",
overhead_cycles * 100 / total_cycles);
} else {
ds_put_cstr(reply, "NOT AVAIL");
}
ds_put_cstr(reply, "\n");
}
ovs_mutex_unlock(&pmd->port_mutex);
free(list);
}
@@ -4521,7 +4551,7 @@ static void
dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
unsigned long long cycles)
{
unsigned int idx = rx->intrvl_idx++ % PMD_RXQ_INTERVAL_MAX;
unsigned int idx = rx->intrvl_idx++ % PMD_INTERVAL_MAX;
atomic_store_relaxed(&rx->cycles_intrvl[idx], cycles);
}
@@ -4978,7 +5008,7 @@ sched_numa_list_assignments(struct sched_numa_list *numa_list,
struct sched_pmd *sched_pmd;
uint64_t proc_cycles = 0;
for (int i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
for (int i = 0; i < PMD_INTERVAL_MAX; i++) {
proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, i);
}
@@ -5238,7 +5268,7 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list,
uint64_t cycle_hist = 0;
/* Sum the queue intervals and store the cycle history. */
for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
for (unsigned i = 0; i < PMD_INTERVAL_MAX; i++) {
cycle_hist += dp_netdev_rxq_get_intrvl_cycles(rxq, i);
}
dp_netdev_rxq_set_cycles(rxq, RXQ_CYCLES_PROC_HIST,
@@ -5418,7 +5448,7 @@ sched_numa_list_variance(struct sched_numa_list *numa_list)
if (total_cycles) {
/* Estimate the cycles to cover all intervals. */
total_cycles *= PMD_RXQ_INTERVAL_MAX;
total_cycles *= PMD_INTERVAL_MAX;
percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100)
/ total_cycles;
} else {
@@ -5935,7 +5965,7 @@ dpif_netdev_run(struct dpif *dpif)
pmd_alb->rebalance_poll_timer = now;
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
if (atomic_count_get(&pmd->pmd_overloaded) >=
PMD_RXQ_INTERVAL_MAX) {
PMD_INTERVAL_MAX) {
pmd_rebalance = true;
break;
}
@@ -6145,6 +6175,10 @@ reload:
pmd->intrvl_tsc_prev = 0;
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
for (i = 0; i < PMD_INTERVAL_MAX; i++) {
atomic_store_relaxed(&pmd->busy_cycles_intrvl[i], 0);
}
pmd->intrvl_idx = 0;
cycles_counter_update(s);
pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
@@ -6677,7 +6711,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
pmd_thread_ctx_time_update(pmd);
pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
pmd->next_cycle_store = pmd->ctx.now + PMD_INTERVAL_LEN;
pmd->busy_cycles_intrvl = xzalloc(PMD_INTERVAL_MAX *
sizeof *pmd->busy_cycles_intrvl);
hmap_init(&pmd->poll_list);
hmap_init(&pmd->tx_ports);
hmap_init(&pmd->tnl_port_cache);
@@ -6716,6 +6752,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
hmap_destroy(&pmd->tx_ports);
cmap_destroy(&pmd->tx_bonds);
hmap_destroy(&pmd->poll_list);
free(pmd->busy_cycles_intrvl);
/* All flows (including their dpcls_rules) have been deleted already */
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
dpcls_destroy(cls);
@@ -8992,21 +9029,22 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
uint64_t tot_idle = 0, tot_proc = 0;
unsigned int pmd_load = 0;
if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
if (pmd->ctx.now > pmd->next_cycle_store) {
uint64_t curr_tsc;
uint8_t rebalance_load_trigger;
struct pmd_auto_lb *pmd_alb = &pmd->dp->pmd_alb;
if (pmd_alb->is_enabled && !pmd->isolated
&& (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
pmd->prev_stats[PMD_CYCLES_ITER_IDLE])
&& (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] >=
pmd->prev_stats[PMD_CYCLES_ITER_BUSY]))
{
unsigned int idx;
if (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
pmd->prev_stats[PMD_CYCLES_ITER_IDLE] &&
pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] >=
pmd->prev_stats[PMD_CYCLES_ITER_BUSY]) {
tot_idle = pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] -
pmd->prev_stats[PMD_CYCLES_ITER_IDLE];
tot_proc = pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] -
pmd->prev_stats[PMD_CYCLES_ITER_BUSY];
if (pmd_alb->is_enabled && !pmd->isolated) {
if (tot_proc) {
pmd_load = ((tot_proc * 100) / (tot_idle + tot_proc));
}
@@ -9019,6 +9057,7 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
atomic_count_set(&pmd->pmd_overloaded, 0);
}
}
}
pmd->prev_stats[PMD_CYCLES_ITER_IDLE] =
pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE];
@@ -9039,9 +9078,11 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
atomic_store_relaxed(&pmd->intrvl_cycles,
curr_tsc - pmd->intrvl_tsc_prev);
}
idx = pmd->intrvl_idx++ % PMD_INTERVAL_MAX;
atomic_store_relaxed(&pmd->busy_cycles_intrvl[idx], tot_proc);
pmd->intrvl_tsc_prev = curr_tsc;
/* Start new measuring interval */
pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
pmd->next_cycle_store = pmd->ctx.now + PMD_INTERVAL_LEN;
}
if (pmd->ctx.now > pmd->next_optimization) {

View File

@@ -73,6 +73,7 @@ AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0],
pmd thread numa_id <cleared> core_id <cleared>:
isolated : false
port: p0 queue-id: 0 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
])
AT_CHECK([ovs-appctl dpif/show | sed 's/\(tx_queues=\)[[0-9]]*/\1<cleared>/g'], [0], [dnl
@@ -111,6 +112,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
])
OVS_VSWITCHD_STOP
@@ -142,6 +144,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
])
TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1))
@@ -190,6 +193,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
])
OVS_VSWITCHD_STOP
@@ -221,6 +225,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
])
# Force cross-numa polling
@@ -285,6 +290,7 @@ pmd thread numa_id 1 core_id 1:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
pmd thread numa_id 0 core_id 2:
isolated : false
])
@@ -306,6 +312,7 @@ pmd thread numa_id 1 core_id 1:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
pmd thread numa_id 0 core_id 2:
isolated : false
])
@@ -325,6 +332,7 @@ pmd thread numa_id 1 core_id 1:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
pmd thread numa_id 0 core_id 2:
isolated : false
])
@@ -345,6 +353,7 @@ pmd thread numa_id 1 core_id 0:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
overhead: NOT AVAIL
])
OVS_VSWITCHD_STOP