mirror of
https://github.com/openvswitch/ovs
synced 2025-09-04 00:05:15 +00:00
dpif-netdev: Report overhead busy cycles per pmd.
Users complained that per rxq pmd usage was confusing: summing those values per pmd would never reach 100% even if increasing traffic load beyond pmd capacity. This is because the dpif-netdev/pmd-rxq-show command only reports "pure" rxq cycles while some cycles are used in the pmd mainloop and adds up to the total pmd load. dpif-netdev/pmd-stats-show does report per pmd load usage. This load is measured since the last dpif-netdev/pmd-stats-clear call. On the other hand, the per rxq pmd usage reflects the pmd load on a 10s sliding window which makes it non trivial to correlate. Gather per pmd busy cycles with the same periodicity and report the difference as overhead in dpif-netdev/pmd-rxq-show so that we have all info in a single command. Example: $ ovs-appctl dpif-netdev/pmd-rxq-show pmd thread numa_id 1 core_id 3: isolated : true port: dpdk0 queue-id: 0 (enabled) pmd usage: 90 % overhead: 4 % pmd thread numa_id 1 core_id 5: isolated : false port: vhost0 queue-id: 0 (enabled) pmd usage: 0 % port: vhost1 queue-id: 0 (enabled) pmd usage: 93 % port: vhost2 queue-id: 0 (enabled) pmd usage: 0 % port: vhost6 queue-id: 0 (enabled) pmd usage: 0 % overhead: 6 % pmd thread numa_id 1 core_id 31: isolated : true port: dpdk1 queue-id: 0 (enabled) pmd usage: 86 % overhead: 4 % pmd thread numa_id 1 core_id 33: isolated : false port: vhost3 queue-id: 0 (enabled) pmd usage: 0 % port: vhost4 queue-id: 0 (enabled) pmd usage: 0 % port: vhost5 queue-id: 0 (enabled) pmd usage: 92 % port: vhost7 queue-id: 0 (enabled) pmd usage: 0 % overhead: 7 % Signed-off-by: David Marchand <david.marchand@redhat.com> Acked-by: Kevin Traynor <ktraynor@redhat.com> Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
committed by
Ian Stokes
parent
30bfba0249
commit
3222a89d9a
@@ -195,6 +195,11 @@ queue::
|
||||
due to traffic pattern or reconfig changes, will take one minute to be fully
|
||||
reflected in the stats.
|
||||
|
||||
.. versionchanged:: 2.16.0
|
||||
|
||||
A ``overhead`` statistics is shown per PMD: it represents the number of
|
||||
cycles inherently consumed by the OVS PMD processing loop.
|
||||
|
||||
Rx queue to PMD assignment takes place whenever there are configuration changes
|
||||
or can be triggered by using::
|
||||
|
||||
|
@@ -99,13 +99,18 @@ struct dp_netdev_pmd_thread {
|
||||
long long int next_optimization;
|
||||
/* End of the next time interval for which processing cycles
|
||||
are stored for each polled rxq. */
|
||||
long long int rxq_next_cycle_store;
|
||||
long long int next_cycle_store;
|
||||
|
||||
/* Last interval timestamp. */
|
||||
uint64_t intrvl_tsc_prev;
|
||||
/* Last interval cycles. */
|
||||
atomic_ullong intrvl_cycles;
|
||||
|
||||
/* Write index for 'busy_cycles_intrvl'. */
|
||||
unsigned int intrvl_idx;
|
||||
/* Busy cycles in last PMD_INTERVAL_MAX intervals. */
|
||||
atomic_ullong *busy_cycles_intrvl;
|
||||
|
||||
/* Current context of the PMD thread. */
|
||||
struct dp_netdev_pmd_thread_ctx ctx;
|
||||
|
||||
|
@@ -155,11 +155,11 @@ static struct odp_support dp_netdev_support = {
|
||||
|
||||
/* Time in microseconds of the interval in which rxq processing cycles used
|
||||
* in rxq to pmd assignments is measured and stored. */
|
||||
#define PMD_RXQ_INTERVAL_LEN 10000000LL
|
||||
#define PMD_INTERVAL_LEN 10000000LL
|
||||
|
||||
/* Number of intervals for which cycles are stored
|
||||
* and used during rxq to pmd assignment. */
|
||||
#define PMD_RXQ_INTERVAL_MAX 6
|
||||
#define PMD_INTERVAL_MAX 6
|
||||
|
||||
/* Time in microseconds to try RCU quiescing. */
|
||||
#define PMD_RCU_QUIESCE_INTERVAL 10000LL
|
||||
@@ -379,9 +379,9 @@ struct dp_netdev_rxq {
|
||||
|
||||
/* Counters of cycles spent successfully polling and processing pkts. */
|
||||
atomic_ullong cycles[RXQ_N_CYCLES];
|
||||
/* We store PMD_RXQ_INTERVAL_MAX intervals of data for an rxq and then
|
||||
/* We store PMD_INTERVAL_MAX intervals of data for an rxq and then
|
||||
sum them to yield the cycles used for an rxq. */
|
||||
atomic_ullong cycles_intrvl[PMD_RXQ_INTERVAL_MAX];
|
||||
atomic_ullong cycles_intrvl[PMD_INTERVAL_MAX];
|
||||
};
|
||||
|
||||
/* A port in a netdev-based datapath. */
|
||||
@@ -791,6 +791,8 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
|
||||
struct rxq_poll *list;
|
||||
size_t n_rxq;
|
||||
uint64_t total_cycles = 0;
|
||||
uint64_t busy_cycles = 0;
|
||||
uint64_t total_rxq_proc_cycles = 0;
|
||||
|
||||
ds_put_format(reply,
|
||||
"pmd thread numa_id %d core_id %u:\n isolated : %s\n",
|
||||
@@ -803,16 +805,27 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
|
||||
/* Get the total pmd cycles for an interval. */
|
||||
atomic_read_relaxed(&pmd->intrvl_cycles, &total_cycles);
|
||||
/* Estimate the cycles to cover all intervals. */
|
||||
total_cycles *= PMD_RXQ_INTERVAL_MAX;
|
||||
total_cycles *= PMD_INTERVAL_MAX;
|
||||
|
||||
for (int j = 0; j < PMD_INTERVAL_MAX; j++) {
|
||||
uint64_t cycles;
|
||||
|
||||
atomic_read_relaxed(&pmd->busy_cycles_intrvl[j], &cycles);
|
||||
busy_cycles += cycles;
|
||||
}
|
||||
if (busy_cycles > total_cycles) {
|
||||
busy_cycles = total_cycles;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_rxq; i++) {
|
||||
struct dp_netdev_rxq *rxq = list[i].rxq;
|
||||
const char *name = netdev_rxq_get_name(rxq->rx);
|
||||
uint64_t proc_cycles = 0;
|
||||
uint64_t rxq_proc_cycles = 0;
|
||||
|
||||
for (int j = 0; j < PMD_RXQ_INTERVAL_MAX; j++) {
|
||||
proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
|
||||
for (int j = 0; j < PMD_INTERVAL_MAX; j++) {
|
||||
rxq_proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
|
||||
}
|
||||
total_rxq_proc_cycles += rxq_proc_cycles;
|
||||
ds_put_format(reply, " port: %-16s queue-id: %2d", name,
|
||||
netdev_rxq_get_queue_id(list[i].rxq->rx));
|
||||
ds_put_format(reply, " %s", netdev_rxq_enabled(list[i].rxq->rx)
|
||||
@@ -820,13 +833,30 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
|
||||
ds_put_format(reply, " pmd usage: ");
|
||||
if (total_cycles) {
|
||||
ds_put_format(reply, "%2"PRIu64"",
|
||||
proc_cycles * 100 / total_cycles);
|
||||
rxq_proc_cycles * 100 / total_cycles);
|
||||
ds_put_cstr(reply, " %");
|
||||
} else {
|
||||
ds_put_format(reply, "%s", "NOT AVAIL");
|
||||
}
|
||||
ds_put_cstr(reply, "\n");
|
||||
}
|
||||
|
||||
if (n_rxq > 0) {
|
||||
ds_put_cstr(reply, " overhead: ");
|
||||
if (total_cycles) {
|
||||
uint64_t overhead_cycles = 0;
|
||||
|
||||
if (total_rxq_proc_cycles < busy_cycles) {
|
||||
overhead_cycles = busy_cycles - total_rxq_proc_cycles;
|
||||
}
|
||||
ds_put_format(reply, "%2"PRIu64" %%",
|
||||
overhead_cycles * 100 / total_cycles);
|
||||
} else {
|
||||
ds_put_cstr(reply, "NOT AVAIL");
|
||||
}
|
||||
ds_put_cstr(reply, "\n");
|
||||
}
|
||||
|
||||
ovs_mutex_unlock(&pmd->port_mutex);
|
||||
free(list);
|
||||
}
|
||||
@@ -4521,7 +4551,7 @@ static void
|
||||
dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
|
||||
unsigned long long cycles)
|
||||
{
|
||||
unsigned int idx = rx->intrvl_idx++ % PMD_RXQ_INTERVAL_MAX;
|
||||
unsigned int idx = rx->intrvl_idx++ % PMD_INTERVAL_MAX;
|
||||
atomic_store_relaxed(&rx->cycles_intrvl[idx], cycles);
|
||||
}
|
||||
|
||||
@@ -4978,7 +5008,7 @@ sched_numa_list_assignments(struct sched_numa_list *numa_list,
|
||||
struct sched_pmd *sched_pmd;
|
||||
uint64_t proc_cycles = 0;
|
||||
|
||||
for (int i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
|
||||
for (int i = 0; i < PMD_INTERVAL_MAX; i++) {
|
||||
proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, i);
|
||||
}
|
||||
|
||||
@@ -5238,7 +5268,7 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list,
|
||||
uint64_t cycle_hist = 0;
|
||||
|
||||
/* Sum the queue intervals and store the cycle history. */
|
||||
for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
|
||||
for (unsigned i = 0; i < PMD_INTERVAL_MAX; i++) {
|
||||
cycle_hist += dp_netdev_rxq_get_intrvl_cycles(rxq, i);
|
||||
}
|
||||
dp_netdev_rxq_set_cycles(rxq, RXQ_CYCLES_PROC_HIST,
|
||||
@@ -5418,7 +5448,7 @@ sched_numa_list_variance(struct sched_numa_list *numa_list)
|
||||
|
||||
if (total_cycles) {
|
||||
/* Estimate the cycles to cover all intervals. */
|
||||
total_cycles *= PMD_RXQ_INTERVAL_MAX;
|
||||
total_cycles *= PMD_INTERVAL_MAX;
|
||||
percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100)
|
||||
/ total_cycles;
|
||||
} else {
|
||||
@@ -5935,7 +5965,7 @@ dpif_netdev_run(struct dpif *dpif)
|
||||
pmd_alb->rebalance_poll_timer = now;
|
||||
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
|
||||
if (atomic_count_get(&pmd->pmd_overloaded) >=
|
||||
PMD_RXQ_INTERVAL_MAX) {
|
||||
PMD_INTERVAL_MAX) {
|
||||
pmd_rebalance = true;
|
||||
break;
|
||||
}
|
||||
@@ -6145,6 +6175,10 @@ reload:
|
||||
|
||||
pmd->intrvl_tsc_prev = 0;
|
||||
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
|
||||
for (i = 0; i < PMD_INTERVAL_MAX; i++) {
|
||||
atomic_store_relaxed(&pmd->busy_cycles_intrvl[i], 0);
|
||||
}
|
||||
pmd->intrvl_idx = 0;
|
||||
cycles_counter_update(s);
|
||||
|
||||
pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
|
||||
@@ -6677,7 +6711,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
|
||||
pmd_thread_ctx_time_update(pmd);
|
||||
pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
|
||||
pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
|
||||
pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
|
||||
pmd->next_cycle_store = pmd->ctx.now + PMD_INTERVAL_LEN;
|
||||
pmd->busy_cycles_intrvl = xzalloc(PMD_INTERVAL_MAX *
|
||||
sizeof *pmd->busy_cycles_intrvl);
|
||||
hmap_init(&pmd->poll_list);
|
||||
hmap_init(&pmd->tx_ports);
|
||||
hmap_init(&pmd->tnl_port_cache);
|
||||
@@ -6716,6 +6752,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
|
||||
hmap_destroy(&pmd->tx_ports);
|
||||
cmap_destroy(&pmd->tx_bonds);
|
||||
hmap_destroy(&pmd->poll_list);
|
||||
free(pmd->busy_cycles_intrvl);
|
||||
/* All flows (including their dpcls_rules) have been deleted already */
|
||||
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
|
||||
dpcls_destroy(cls);
|
||||
@@ -8992,21 +9029,22 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
|
||||
uint64_t tot_idle = 0, tot_proc = 0;
|
||||
unsigned int pmd_load = 0;
|
||||
|
||||
if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
|
||||
if (pmd->ctx.now > pmd->next_cycle_store) {
|
||||
uint64_t curr_tsc;
|
||||
uint8_t rebalance_load_trigger;
|
||||
struct pmd_auto_lb *pmd_alb = &pmd->dp->pmd_alb;
|
||||
if (pmd_alb->is_enabled && !pmd->isolated
|
||||
&& (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_IDLE])
|
||||
&& (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] >=
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_BUSY]))
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
if (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_IDLE] &&
|
||||
pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] >=
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_BUSY]) {
|
||||
tot_idle = pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] -
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_IDLE];
|
||||
tot_proc = pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] -
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_BUSY];
|
||||
|
||||
if (pmd_alb->is_enabled && !pmd->isolated) {
|
||||
if (tot_proc) {
|
||||
pmd_load = ((tot_proc * 100) / (tot_idle + tot_proc));
|
||||
}
|
||||
@@ -9019,6 +9057,7 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
|
||||
atomic_count_set(&pmd->pmd_overloaded, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pmd->prev_stats[PMD_CYCLES_ITER_IDLE] =
|
||||
pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE];
|
||||
@@ -9039,9 +9078,11 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
|
||||
atomic_store_relaxed(&pmd->intrvl_cycles,
|
||||
curr_tsc - pmd->intrvl_tsc_prev);
|
||||
}
|
||||
idx = pmd->intrvl_idx++ % PMD_INTERVAL_MAX;
|
||||
atomic_store_relaxed(&pmd->busy_cycles_intrvl[idx], tot_proc);
|
||||
pmd->intrvl_tsc_prev = curr_tsc;
|
||||
/* Start new measuring interval */
|
||||
pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
|
||||
pmd->next_cycle_store = pmd->ctx.now + PMD_INTERVAL_LEN;
|
||||
}
|
||||
|
||||
if (pmd->ctx.now > pmd->next_optimization) {
|
||||
|
@@ -73,6 +73,7 @@ AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0],
|
||||
pmd thread numa_id <cleared> core_id <cleared>:
|
||||
isolated : false
|
||||
port: p0 queue-id: 0 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
])
|
||||
|
||||
AT_CHECK([ovs-appctl dpif/show | sed 's/\(tx_queues=\)[[0-9]]*/\1<cleared>/g'], [0], [dnl
|
||||
@@ -111,6 +112,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
])
|
||||
|
||||
OVS_VSWITCHD_STOP
|
||||
@@ -142,6 +144,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
])
|
||||
|
||||
TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1))
|
||||
@@ -190,6 +193,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
])
|
||||
|
||||
OVS_VSWITCHD_STOP
|
||||
@@ -221,6 +225,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
])
|
||||
|
||||
# Force cross-numa polling
|
||||
@@ -285,6 +290,7 @@ pmd thread numa_id 1 core_id 1:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
pmd thread numa_id 0 core_id 2:
|
||||
isolated : false
|
||||
])
|
||||
@@ -306,6 +312,7 @@ pmd thread numa_id 1 core_id 1:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
pmd thread numa_id 0 core_id 2:
|
||||
isolated : false
|
||||
])
|
||||
@@ -325,6 +332,7 @@ pmd thread numa_id 1 core_id 1:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
pmd thread numa_id 0 core_id 2:
|
||||
isolated : false
|
||||
])
|
||||
@@ -345,6 +353,7 @@ pmd thread numa_id 1 core_id 0:
|
||||
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
|
||||
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
|
||||
overhead: NOT AVAIL
|
||||
])
|
||||
|
||||
OVS_VSWITCHD_STOP
|
||||
|
Reference in New Issue
Block a user