mirror of
https://github.com/openvswitch/ovs
synced 2025-09-04 00:05:15 +00:00
dpif-netdev: Detailed performance stats for PMDs
This patch instruments the dpif-netdev datapath to record detailed statistics of what is happening in every iteration of a PMD thread. The collection of detailed statistics can be controlled by a new Open_vSwitch configuration parameter "other_config:pmd-perf-metrics". By default it is disabled. The run-time overhead, when enabled, is in the order of 1%. The covered metrics per iteration are: - cycles - packets - (rx) batches - packets/batch - max. vhostuser qlen - upcalls - cycles spent in upcalls This raw recorded data is used threefold: 1. In histograms for each of the following metrics: - cycles/iteration (log.) - packets/iteration (log.) - cycles/packet - packets/batch - max. vhostuser qlen (log.) - upcalls - cycles/upcall (log) The histograms bins are divided linear or logarithmic. 2. A cyclic history of the above statistics for 999 iterations 3. A cyclic history of the cummulative/average values per millisecond wall clock for the last 1000 milliseconds: - number of iterations - avg. cycles/iteration - packets (Kpps) - avg. packets/batch - avg. max vhost qlen - upcalls - avg. cycles/upcall The gathered performance metrics can be printed at any time with the new CLI command ovs-appctl dpif-netdev/pmd-perf-show [-nh] [-it iter_len] [-ms ms_len] [-pmd core] [dp] The options are -nh: Suppress the histograms -it iter_len: Display the last iter_len iteration stats -ms ms_len: Display the last ms_len millisecond stats -pmd core: Display only the specified PMD The performance statistics are reset with the existing dpif-netdev/pmd-stats-clear command. The output always contains the following global PMD statistics, similar to the pmd-stats-show command: Time: 15:24:55.270 Measurement duration: 1.008 s pmd thread numa_id 0 core_id 1: Cycles: 2419034712 (2.40 GHz) Iterations: 572817 (1.76 us/it) - idle: 486808 (15.9 % cycles) - busy: 86009 (84.1 % cycles) Rx packets: 2399607 (2381 Kpps, 848 cycles/pkt) Datapath passes: 3599415 (1.50 passes/pkt) - EMC hits: 336472 ( 9.3 %) - Megaflow hits: 3262943 (90.7 %, 1.00 subtbl lookups/hit) - Upcalls: 0 ( 0.0 %, 0.0 us/upcall) - Lost upcalls: 0 ( 0.0 %) Tx packets: 2399607 (2381 Kpps) Tx batches: 171400 (14.00 pkts/batch) Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com> Acked-by: Billy O'Mahony <billy.o.mahony@intel.com> Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
committed by
Ian Stokes
parent
8492adc270
commit
79f368756c
4
NEWS
4
NEWS
@@ -27,6 +27,10 @@ Post-v2.9.0
|
|||||||
- DPDK:
|
- DPDK:
|
||||||
* New 'check-dpdk' Makefile target to run a new system testsuite.
|
* New 'check-dpdk' Makefile target to run a new system testsuite.
|
||||||
See Testing topic for the details.
|
See Testing topic for the details.
|
||||||
|
- Userspace datapath:
|
||||||
|
* Commands ovs-appctl dpif-netdev/pmd-*-show can now work on a single PMD
|
||||||
|
* Detailed PMD performance metrics available with new command
|
||||||
|
ovs-appctl dpif-netdev/pmd-perf-show
|
||||||
|
|
||||||
v2.9.0 - 19 Feb 2018
|
v2.9.0 - 19 Feb 2018
|
||||||
--------------------
|
--------------------
|
||||||
|
@@ -492,6 +492,7 @@ MAN_FRAGMENTS += \
|
|||||||
lib/dpctl.man \
|
lib/dpctl.man \
|
||||||
lib/memory-unixctl.man \
|
lib/memory-unixctl.man \
|
||||||
lib/netdev-dpdk-unixctl.man \
|
lib/netdev-dpdk-unixctl.man \
|
||||||
|
lib/dpif-netdev-unixctl.man \
|
||||||
lib/ofp-version.man \
|
lib/ofp-version.man \
|
||||||
lib/ovs.tmac \
|
lib/ovs.tmac \
|
||||||
lib/service.man \
|
lib/service.man \
|
||||||
|
@@ -15,18 +15,333 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <config.h>
|
#include <config.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "dpif-netdev-perf.h"
|
||||||
#include "openvswitch/dynamic-string.h"
|
#include "openvswitch/dynamic-string.h"
|
||||||
#include "openvswitch/vlog.h"
|
#include "openvswitch/vlog.h"
|
||||||
#include "dpif-netdev-perf.h"
|
#include "ovs-thread.h"
|
||||||
#include "timeval.h"
|
#include "timeval.h"
|
||||||
|
|
||||||
VLOG_DEFINE_THIS_MODULE(pmd_perf);
|
VLOG_DEFINE_THIS_MODULE(pmd_perf);
|
||||||
|
|
||||||
|
#ifdef DPDK_NETDEV
|
||||||
|
static uint64_t
|
||||||
|
get_tsc_hz(void)
|
||||||
|
{
|
||||||
|
return rte_get_tsc_hz();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* This function is only invoked from PMD threads which depend on DPDK.
|
||||||
|
* A dummy function is sufficient when building without DPDK_NETDEV. */
|
||||||
|
static uint64_t
|
||||||
|
get_tsc_hz(void)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Histogram functions. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ovs_assert(min < max);
|
||||||
|
for (i = 0; i < NUM_BINS-1; i++) {
|
||||||
|
hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2);
|
||||||
|
}
|
||||||
|
hist->wall[NUM_BINS-1] = UINT32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max)
|
||||||
|
{
|
||||||
|
int i, start, bins, wall;
|
||||||
|
double log_min, log_max;
|
||||||
|
|
||||||
|
ovs_assert(min < max);
|
||||||
|
if (min > 0) {
|
||||||
|
log_min = log(min);
|
||||||
|
log_max = log(max);
|
||||||
|
start = 0;
|
||||||
|
bins = NUM_BINS - 1;
|
||||||
|
} else {
|
||||||
|
hist->wall[0] = 0;
|
||||||
|
log_min = log(1);
|
||||||
|
log_max = log(max);
|
||||||
|
start = 1;
|
||||||
|
bins = NUM_BINS - 2;
|
||||||
|
}
|
||||||
|
wall = start;
|
||||||
|
for (i = 0; i < bins; i++) {
|
||||||
|
/* Make sure each wall is monotonically increasing. */
|
||||||
|
wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / (bins-1)));
|
||||||
|
hist->wall[start + i] = wall++;
|
||||||
|
}
|
||||||
|
if (hist->wall[NUM_BINS-2] < max) {
|
||||||
|
hist->wall[NUM_BINS-2] = max;
|
||||||
|
}
|
||||||
|
hist->wall[NUM_BINS-1] = UINT32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
histogram_samples(const struct histogram *hist)
|
||||||
|
{
|
||||||
|
uint64_t samples = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_BINS; i++) {
|
||||||
|
samples += hist->bin[i];
|
||||||
|
}
|
||||||
|
return samples;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
histogram_clear(struct histogram *hist)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < NUM_BINS; i++) {
|
||||||
|
hist->bin[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
history_init(struct history *h)
|
||||||
|
{
|
||||||
|
memset(h, 0, sizeof(*h));
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
pmd_perf_stats_init(struct pmd_perf_stats *s)
|
pmd_perf_stats_init(struct pmd_perf_stats *s)
|
||||||
{
|
{
|
||||||
memset(s, 0 , sizeof(*s));
|
memset(s, 0, sizeof(*s));
|
||||||
|
ovs_mutex_init(&s->stats_mutex);
|
||||||
|
ovs_mutex_init(&s->clear_mutex);
|
||||||
|
/* Logarithmic histogram for cycles/it ranging from 500 to 24M
|
||||||
|
* (corresponding to 200 ns to 9.6 ms at 2.5 GHz TSC clock). */
|
||||||
|
histogram_walls_set_log(&s->cycles, 500, 24000000);
|
||||||
|
/* Logarithmic histogram for pkts/it ranging from 0 to 1000. */
|
||||||
|
histogram_walls_set_log(&s->pkts, 0, 1000);
|
||||||
|
/* Linear histogram for cycles/pkt ranging from 100 to 30K. */
|
||||||
|
histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000);
|
||||||
|
/* Linear histogram for pkts/rx batch ranging from 0 to 32,
|
||||||
|
* the maximum rx batch size in OVS. */
|
||||||
|
histogram_walls_set_lin(&s->pkts_per_batch, 0, 32);
|
||||||
|
/* Linear histogram for upcalls/it ranging from 0 to 30. */
|
||||||
|
histogram_walls_set_lin(&s->upcalls, 0, 30);
|
||||||
|
/* Logarithmic histogram for cycles/upcall ranging from 1000 to 1M
|
||||||
|
* (corresponding to 400 ns to 400 us at 2.5 GHz TSC clock). */
|
||||||
|
histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
|
||||||
|
/* Log. histogram for max vhostuser queue fill level from 0 to 512.
|
||||||
|
* 512 is the maximum fill level for a virtio queue with 1024
|
||||||
|
* descriptors (maximum configurable length in Qemu), with the
|
||||||
|
* DPDK 17.11 virtio PMD in the guest. */
|
||||||
|
histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
|
||||||
|
s->iteration_cnt = 0;
|
||||||
|
s->start_ms = time_msec();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
|
||||||
|
double duration)
|
||||||
|
{
|
||||||
|
uint64_t stats[PMD_N_STATS];
|
||||||
|
double us_per_cycle = 1000000.0 / get_tsc_hz();
|
||||||
|
|
||||||
|
if (duration == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pmd_perf_read_counters(s, stats);
|
||||||
|
uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] +
|
||||||
|
stats[PMD_CYCLES_ITER_BUSY];
|
||||||
|
uint64_t rx_packets = stats[PMD_STAT_RECV];
|
||||||
|
uint64_t tx_packets = stats[PMD_STAT_SENT_PKTS];
|
||||||
|
uint64_t tx_batches = stats[PMD_STAT_SENT_BATCHES];
|
||||||
|
uint64_t passes = stats[PMD_STAT_RECV] +
|
||||||
|
stats[PMD_STAT_RECIRC];
|
||||||
|
uint64_t upcalls = stats[PMD_STAT_MISS];
|
||||||
|
uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL];
|
||||||
|
uint64_t tot_iter = histogram_samples(&s->pkts);
|
||||||
|
uint64_t idle_iter = s->pkts.bin[0];
|
||||||
|
uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0;
|
||||||
|
|
||||||
|
ds_put_format(str,
|
||||||
|
" Cycles: %12"PRIu64" (%.2f GHz)\n"
|
||||||
|
" Iterations: %12"PRIu64" (%.2f us/it)\n"
|
||||||
|
" - idle: %12"PRIu64" (%4.1f %% cycles)\n"
|
||||||
|
" - busy: %12"PRIu64" (%4.1f %% cycles)\n",
|
||||||
|
tot_cycles, (tot_cycles / duration) / 1E9,
|
||||||
|
tot_iter, tot_cycles * us_per_cycle / tot_iter,
|
||||||
|
idle_iter,
|
||||||
|
100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles,
|
||||||
|
busy_iter,
|
||||||
|
100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles);
|
||||||
|
if (rx_packets > 0) {
|
||||||
|
ds_put_format(str,
|
||||||
|
" Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n"
|
||||||
|
" Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n"
|
||||||
|
" - EMC hits: %12"PRIu64" (%4.1f %%)\n"
|
||||||
|
" - Megaflow hits: %12"PRIu64" (%4.1f %%, %.2f subtbl lookups/"
|
||||||
|
"hit)\n"
|
||||||
|
" - Upcalls: %12"PRIu64" (%4.1f %%, %.1f us/upcall)\n"
|
||||||
|
" - Lost upcalls: %12"PRIu64" (%4.1f %%)\n",
|
||||||
|
rx_packets, (rx_packets / duration) / 1000,
|
||||||
|
1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
|
||||||
|
passes, rx_packets ? 1.0 * passes / rx_packets : 0,
|
||||||
|
stats[PMD_STAT_EXACT_HIT],
|
||||||
|
100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
|
||||||
|
stats[PMD_STAT_MASKED_HIT],
|
||||||
|
100.0 * stats[PMD_STAT_MASKED_HIT] / passes,
|
||||||
|
stats[PMD_STAT_MASKED_HIT]
|
||||||
|
? 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / stats[PMD_STAT_MASKED_HIT]
|
||||||
|
: 0,
|
||||||
|
upcalls, 100.0 * upcalls / passes,
|
||||||
|
upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0,
|
||||||
|
stats[PMD_STAT_LOST],
|
||||||
|
100.0 * stats[PMD_STAT_LOST] / passes);
|
||||||
|
} else {
|
||||||
|
ds_put_format(str, " Rx packets: %12d\n", 0);
|
||||||
|
}
|
||||||
|
if (tx_packets > 0) {
|
||||||
|
ds_put_format(str,
|
||||||
|
" Tx packets: %12"PRIu64" (%.0f Kpps)\n"
|
||||||
|
" Tx batches: %12"PRIu64" (%.2f pkts/batch)"
|
||||||
|
"\n",
|
||||||
|
tx_packets, (tx_packets / duration) / 1000,
|
||||||
|
tx_batches, 1.0 * tx_packets / tx_batches);
|
||||||
|
} else {
|
||||||
|
ds_put_format(str, " Tx packets: %12d\n\n", 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ds_put_cstr(str, "Histograms\n");
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
|
||||||
|
"cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
|
||||||
|
"max vhost qlen", "upcalls/it", "cycles/upcall");
|
||||||
|
for (i = 0; i < NUM_BINS-1; i++) {
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
|
||||||
|
" %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
|
||||||
|
" %-9d %-11"PRIu64"\n",
|
||||||
|
s->cycles.wall[i], s->cycles.bin[i],
|
||||||
|
s->pkts.wall[i],s->pkts.bin[i],
|
||||||
|
s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i],
|
||||||
|
s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
|
||||||
|
s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
|
||||||
|
s->upcalls.wall[i], s->upcalls.bin[i],
|
||||||
|
s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
|
||||||
|
}
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
|
||||||
|
" %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
|
||||||
|
" %-9s %-11"PRIu64"\n",
|
||||||
|
">", s->cycles.bin[i],
|
||||||
|
">", s->pkts.bin[i],
|
||||||
|
">", s->cycles_per_pkt.bin[i],
|
||||||
|
">", s->pkts_per_batch.bin[i],
|
||||||
|
">", s->max_vhost_qfill.bin[i],
|
||||||
|
">", s->upcalls.bin[i],
|
||||||
|
">", s->cycles_per_upcall.bin[i]);
|
||||||
|
if (s->totals.iterations > 0) {
|
||||||
|
ds_put_cstr(str,
|
||||||
|
"-----------------------------------------------------"
|
||||||
|
"-----------------------------------------------------"
|
||||||
|
"------------------------------------------------\n");
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
|
||||||
|
"cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
|
||||||
|
"vhost qlen", "upcalls/it", "cycles/upcall");
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-21"PRIu64" %-21.5f %-21"PRIu64
|
||||||
|
" %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n",
|
||||||
|
s->totals.cycles / s->totals.iterations,
|
||||||
|
1.0 * s->totals.pkts / s->totals.iterations,
|
||||||
|
s->totals.pkts
|
||||||
|
? s->totals.busy_cycles / s->totals.pkts : 0,
|
||||||
|
s->totals.batches
|
||||||
|
? 1.0 * s->totals.pkts / s->totals.batches : 0,
|
||||||
|
1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
|
||||||
|
1.0 * s->totals.upcalls / s->totals.iterations,
|
||||||
|
s->totals.upcalls
|
||||||
|
? s->totals.upcall_cycles / s->totals.upcalls : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
|
||||||
|
int n_iter)
|
||||||
|
{
|
||||||
|
struct iter_stats *is;
|
||||||
|
size_t index;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (n_iter == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s "
|
||||||
|
"%-10s %-10s %-10s\n",
|
||||||
|
"iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
|
||||||
|
"vhost qlen", "upcalls", "cycles/upcall");
|
||||||
|
for (i = 1; i <= n_iter; i++) {
|
||||||
|
index = history_sub(s->iterations.idx, i);
|
||||||
|
is = &s->iterations.sample[index];
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-17"PRIu64" %-11"PRIu64" %-11"PRIu32
|
||||||
|
" %-11"PRIu64" %-11"PRIu32" %-11"PRIu32
|
||||||
|
" %-11"PRIu32" %-11"PRIu32"\n",
|
||||||
|
is->timestamp,
|
||||||
|
is->cycles,
|
||||||
|
is->pkts,
|
||||||
|
is->pkts ? is->cycles / is->pkts : 0,
|
||||||
|
is->batches ? is->pkts / is->batches : 0,
|
||||||
|
is->max_vhost_qfill,
|
||||||
|
is->upcalls,
|
||||||
|
is->upcalls ? is->upcall_cycles / is->upcalls : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
|
||||||
|
{
|
||||||
|
struct iter_stats *is;
|
||||||
|
size_t index;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (n_ms == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-12s %-10s %-10s %-10s %-10s"
|
||||||
|
" %-10s %-10s %-10s %-10s\n",
|
||||||
|
"ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
|
||||||
|
"pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
|
||||||
|
for (i = 1; i <= n_ms; i++) {
|
||||||
|
index = history_sub(s->milliseconds.idx, i);
|
||||||
|
is = &s->milliseconds.sample[index];
|
||||||
|
ds_put_format(str,
|
||||||
|
" %-12"PRIu64" %-11"PRIu32" %-11"PRIu64
|
||||||
|
" %-11"PRIu32" %-11"PRIu64" %-11"PRIu32
|
||||||
|
" %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n",
|
||||||
|
is->timestamp,
|
||||||
|
is->iterations,
|
||||||
|
is->iterations ? is->cycles / is->iterations : 0,
|
||||||
|
is->pkts,
|
||||||
|
is->pkts ? is->busy_cycles / is->pkts : 0,
|
||||||
|
is->batches ? is->pkts / is->batches : 0,
|
||||||
|
is->iterations
|
||||||
|
? is->max_vhost_qfill / is->iterations : 0,
|
||||||
|
is->upcalls,
|
||||||
|
is->upcalls ? is->upcall_cycles / is->upcalls : 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -51,10 +366,152 @@ pmd_perf_read_counters(struct pmd_perf_stats *s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This function clears the PMD performance counters from within the PMD
|
||||||
|
* thread or from another thread when the PMD thread is not executing its
|
||||||
|
* poll loop. */
|
||||||
void
|
void
|
||||||
pmd_perf_stats_clear(struct pmd_perf_stats *s)
|
pmd_perf_stats_clear_lock(struct pmd_perf_stats *s)
|
||||||
|
OVS_REQUIRES(s->stats_mutex)
|
||||||
{
|
{
|
||||||
|
ovs_mutex_lock(&s->clear_mutex);
|
||||||
for (int i = 0; i < PMD_N_STATS; i++) {
|
for (int i = 0; i < PMD_N_STATS; i++) {
|
||||||
atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
|
atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
|
||||||
}
|
}
|
||||||
|
/* The following stats are only applicable in PMD thread and */
|
||||||
|
memset(&s->current, 0 , sizeof(struct iter_stats));
|
||||||
|
memset(&s->totals, 0 , sizeof(struct iter_stats));
|
||||||
|
histogram_clear(&s->cycles);
|
||||||
|
histogram_clear(&s->pkts);
|
||||||
|
histogram_clear(&s->cycles_per_pkt);
|
||||||
|
histogram_clear(&s->upcalls);
|
||||||
|
histogram_clear(&s->cycles_per_upcall);
|
||||||
|
histogram_clear(&s->pkts_per_batch);
|
||||||
|
histogram_clear(&s->max_vhost_qfill);
|
||||||
|
history_init(&s->iterations);
|
||||||
|
history_init(&s->milliseconds);
|
||||||
|
s->start_ms = time_msec();
|
||||||
|
s->milliseconds.sample[0].timestamp = s->start_ms;
|
||||||
|
/* Clearing finished. */
|
||||||
|
s->clear = false;
|
||||||
|
ovs_mutex_unlock(&s->clear_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This function can be called from the anywhere to clear the stats
|
||||||
|
* of PMD and non-PMD threads. */
|
||||||
|
void
|
||||||
|
pmd_perf_stats_clear(struct pmd_perf_stats *s)
|
||||||
|
{
|
||||||
|
if (ovs_mutex_trylock(&s->stats_mutex) == 0) {
|
||||||
|
/* Locking successful. PMD not polling. */
|
||||||
|
pmd_perf_stats_clear_lock(s);
|
||||||
|
ovs_mutex_unlock(&s->stats_mutex);
|
||||||
|
} else {
|
||||||
|
/* Request the polling PMD to clear the stats. There is no need to
|
||||||
|
* block here as stats retrieval is prevented during clearing. */
|
||||||
|
s->clear = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Functions recording PMD metrics per iteration. */
|
||||||
|
|
||||||
|
inline void
|
||||||
|
pmd_perf_start_iteration(struct pmd_perf_stats *s)
|
||||||
|
OVS_REQUIRES(s->stats_mutex)
|
||||||
|
{
|
||||||
|
if (s->clear) {
|
||||||
|
/* Clear the PMD stats before starting next iteration. */
|
||||||
|
pmd_perf_stats_clear_lock(s);
|
||||||
|
}
|
||||||
|
s->iteration_cnt++;
|
||||||
|
/* Initialize the current interval stats. */
|
||||||
|
memset(&s->current, 0, sizeof(struct iter_stats));
|
||||||
|
if (OVS_LIKELY(s->last_tsc)) {
|
||||||
|
/* We assume here that last_tsc was updated immediately prior at
|
||||||
|
* the end of the previous iteration, or just before the first
|
||||||
|
* iteration. */
|
||||||
|
s->start_tsc = s->last_tsc;
|
||||||
|
} else {
|
||||||
|
/* In case last_tsc has never been set before. */
|
||||||
|
s->start_tsc = cycles_counter_update(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
|
||||||
|
int tx_packets, bool full_metrics)
|
||||||
|
{
|
||||||
|
uint64_t now_tsc = cycles_counter_update(s);
|
||||||
|
struct iter_stats *cum_ms;
|
||||||
|
uint64_t cycles, cycles_per_pkt = 0;
|
||||||
|
|
||||||
|
cycles = now_tsc - s->start_tsc;
|
||||||
|
s->current.timestamp = s->iteration_cnt;
|
||||||
|
s->current.cycles = cycles;
|
||||||
|
s->current.pkts = rx_packets;
|
||||||
|
|
||||||
|
if (rx_packets + tx_packets > 0) {
|
||||||
|
pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
|
||||||
|
} else {
|
||||||
|
pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
|
||||||
|
}
|
||||||
|
/* Add iteration samples to histograms. */
|
||||||
|
histogram_add_sample(&s->cycles, cycles);
|
||||||
|
histogram_add_sample(&s->pkts, rx_packets);
|
||||||
|
|
||||||
|
if (!full_metrics) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles;
|
||||||
|
|
||||||
|
if (rx_packets > 0) {
|
||||||
|
cycles_per_pkt = cycles / rx_packets;
|
||||||
|
histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt);
|
||||||
|
}
|
||||||
|
if (s->current.batches > 0) {
|
||||||
|
histogram_add_sample(&s->pkts_per_batch,
|
||||||
|
rx_packets / s->current.batches);
|
||||||
|
}
|
||||||
|
histogram_add_sample(&s->upcalls, s->current.upcalls);
|
||||||
|
if (s->current.upcalls > 0) {
|
||||||
|
histogram_add_sample(&s->cycles_per_upcall,
|
||||||
|
s->current.upcall_cycles / s->current.upcalls);
|
||||||
|
}
|
||||||
|
histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
|
||||||
|
|
||||||
|
/* Add iteration samples to millisecond stats. */
|
||||||
|
cum_ms = history_current(&s->milliseconds);
|
||||||
|
cum_ms->iterations++;
|
||||||
|
cum_ms->cycles += cycles;
|
||||||
|
if (rx_packets > 0) {
|
||||||
|
cum_ms->busy_cycles += cycles;
|
||||||
|
}
|
||||||
|
cum_ms->pkts += s->current.pkts;
|
||||||
|
cum_ms->upcalls += s->current.upcalls;
|
||||||
|
cum_ms->upcall_cycles += s->current.upcall_cycles;
|
||||||
|
cum_ms->batches += s->current.batches;
|
||||||
|
cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
|
||||||
|
|
||||||
|
/* Store in iteration history. This advances the iteration idx and
|
||||||
|
* clears the next slot in the iteration history. */
|
||||||
|
history_store(&s->iterations, &s->current);
|
||||||
|
if (now_tsc > s->next_check_tsc) {
|
||||||
|
/* Check if ms is completed and store in milliseconds history. */
|
||||||
|
uint64_t now = time_msec();
|
||||||
|
if (now != cum_ms->timestamp) {
|
||||||
|
/* Add ms stats to totals. */
|
||||||
|
s->totals.iterations += cum_ms->iterations;
|
||||||
|
s->totals.cycles += cum_ms->cycles;
|
||||||
|
s->totals.busy_cycles += cum_ms->busy_cycles;
|
||||||
|
s->totals.pkts += cum_ms->pkts;
|
||||||
|
s->totals.upcalls += cum_ms->upcalls;
|
||||||
|
s->totals.upcall_cycles += cum_ms->upcall_cycles;
|
||||||
|
s->totals.batches += cum_ms->batches;
|
||||||
|
s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
|
||||||
|
cum_ms = history_next(&s->milliseconds);
|
||||||
|
cum_ms->timestamp = now;
|
||||||
|
}
|
||||||
|
/* Do the next check after 10K cycles (4 us at 2.5 GHz TSC clock). */
|
||||||
|
s->next_check_tsc = cycles_counter_update(s) + 10000;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -38,10 +38,18 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This module encapsulates data structures and functions to maintain PMD
|
/* This module encapsulates data structures and functions to maintain basic PMD
|
||||||
* performance metrics such as packet counters, execution cycles. It
|
* performance metrics such as packet counters, execution cycles as well as
|
||||||
* provides a clean API for dpif-netdev to initialize, update and read and
|
* histograms and time series recording for more detailed PMD metrics.
|
||||||
|
*
|
||||||
|
* It provides a clean API for dpif-netdev to initialize, update and read and
|
||||||
* reset these metrics.
|
* reset these metrics.
|
||||||
|
*
|
||||||
|
* The basic set of PMD counters is implemented as atomic_uint64_t variables
|
||||||
|
* to guarantee correct read also in 32-bit systems.
|
||||||
|
*
|
||||||
|
* The detailed PMD performance metrics are only supported on 64-bit systems
|
||||||
|
* with atomic 64-bit read and store semantics for plain uint64_t counters.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Set of counter types maintained in pmd_perf_stats. */
|
/* Set of counter types maintained in pmd_perf_stats. */
|
||||||
@@ -66,6 +74,7 @@ enum pmd_stat_type {
|
|||||||
PMD_STAT_SENT_BATCHES, /* Number of batches sent. */
|
PMD_STAT_SENT_BATCHES, /* Number of batches sent. */
|
||||||
PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */
|
PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */
|
||||||
PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */
|
PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */
|
||||||
|
PMD_CYCLES_UPCALL, /* Cycles spent processing upcalls. */
|
||||||
PMD_N_STATS
|
PMD_N_STATS
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -81,18 +90,91 @@ struct pmd_counters {
|
|||||||
uint64_t zero[PMD_N_STATS]; /* Value at last _clear(). */
|
uint64_t zero[PMD_N_STATS]; /* Value at last _clear(). */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Container for all performance metrics of a PMD.
|
/* Data structure to collect statistical distribution of an integer measurement
|
||||||
* Part of the struct dp_netdev_pmd_thread. */
|
* type in form of a histogram. The wall[] array contains the inclusive
|
||||||
|
* upper boundaries of the bins, while the bin[] array contains the actual
|
||||||
|
* counters per bin. The histogram walls are typically set automatically
|
||||||
|
* using the functions provided below.*/
|
||||||
|
|
||||||
|
#define NUM_BINS 32 /* Number of histogram bins. */
|
||||||
|
|
||||||
|
struct histogram {
|
||||||
|
uint32_t wall[NUM_BINS];
|
||||||
|
uint64_t bin[NUM_BINS];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Data structure to record details PMD execution metrics per iteration for
|
||||||
|
* a history period of up to HISTORY_LEN iterations in circular buffer.
|
||||||
|
* Also used to record up to HISTORY_LEN millisecond averages/totals of these
|
||||||
|
* metrics.*/
|
||||||
|
|
||||||
|
struct iter_stats {
|
||||||
|
uint64_t timestamp; /* Iteration no. or millisecond. */
|
||||||
|
uint64_t cycles; /* Number of TSC cycles spent in it. or ms. */
|
||||||
|
uint64_t busy_cycles; /* Cycles spent in busy iterations or ms. */
|
||||||
|
uint32_t iterations; /* Iterations in ms. */
|
||||||
|
uint32_t pkts; /* Packets processed in iteration or ms. */
|
||||||
|
uint32_t upcalls; /* Number of upcalls in iteration or ms. */
|
||||||
|
uint32_t upcall_cycles; /* Cycles spent in upcalls in it. or ms. */
|
||||||
|
uint32_t batches; /* Number of rx batches in iteration or ms. */
|
||||||
|
uint32_t max_vhost_qfill; /* Maximum fill level in iteration or ms. */
|
||||||
|
};
|
||||||
|
|
||||||
|
#define HISTORY_LEN 1000 /* Length of recorded history
|
||||||
|
(iterations and ms). */
|
||||||
|
#define DEF_HIST_SHOW 20 /* Default number of history samples to
|
||||||
|
display. */
|
||||||
|
|
||||||
|
struct history {
|
||||||
|
size_t idx; /* Slot to which next call to history_store()
|
||||||
|
will write. */
|
||||||
|
struct iter_stats sample[HISTORY_LEN];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Container for all performance metrics of a PMD within the struct
|
||||||
|
* dp_netdev_pmd_thread. The metrics must be updated from within the PMD
|
||||||
|
* thread but can be read from any thread. The basic PMD counters in
|
||||||
|
* struct pmd_counters can be read without protection against concurrent
|
||||||
|
* clearing. The other metrics may only be safely read with the clear_mutex
|
||||||
|
* held to protect against concurrent clearing. */
|
||||||
|
|
||||||
struct pmd_perf_stats {
|
struct pmd_perf_stats {
|
||||||
/* Start of the current PMD iteration in TSC cycles.*/
|
/* Prevents interference between PMD polling and stats clearing. */
|
||||||
uint64_t start_it_tsc;
|
struct ovs_mutex stats_mutex;
|
||||||
|
/* Set by CLI thread to order clearing of PMD stats. */
|
||||||
|
volatile bool clear;
|
||||||
|
/* Prevents stats retrieval while clearing is in progress. */
|
||||||
|
struct ovs_mutex clear_mutex;
|
||||||
|
/* Start of the current performance measurement period. */
|
||||||
|
uint64_t start_ms;
|
||||||
|
/* Counter for PMD iterations. */
|
||||||
|
uint64_t iteration_cnt;
|
||||||
|
/* Start of the current iteration. */
|
||||||
|
uint64_t start_tsc;
|
||||||
/* Latest TSC time stamp taken in PMD. */
|
/* Latest TSC time stamp taken in PMD. */
|
||||||
uint64_t last_tsc;
|
uint64_t last_tsc;
|
||||||
|
/* Used to space certain checks in time. */
|
||||||
|
uint64_t next_check_tsc;
|
||||||
/* If non-NULL, outermost cycle timer currently running in PMD. */
|
/* If non-NULL, outermost cycle timer currently running in PMD. */
|
||||||
struct cycle_timer *cur_timer;
|
struct cycle_timer *cur_timer;
|
||||||
/* Set of PMD counters with their zero offsets. */
|
/* Set of PMD counters with their zero offsets. */
|
||||||
struct pmd_counters counters;
|
struct pmd_counters counters;
|
||||||
|
/* Statistics of the current iteration. */
|
||||||
|
struct iter_stats current;
|
||||||
|
/* Totals for the current millisecond. */
|
||||||
|
struct iter_stats totals;
|
||||||
|
/* Histograms for the PMD metrics. */
|
||||||
|
struct histogram cycles;
|
||||||
|
struct histogram pkts;
|
||||||
|
struct histogram cycles_per_pkt;
|
||||||
|
struct histogram upcalls;
|
||||||
|
struct histogram cycles_per_upcall;
|
||||||
|
struct histogram pkts_per_batch;
|
||||||
|
struct histogram max_vhost_qfill;
|
||||||
|
/* Iteration history buffer. */
|
||||||
|
struct history iterations;
|
||||||
|
/* Millisecond history buffer. */
|
||||||
|
struct history milliseconds;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Support for accurate timing of PMD execution on TSC clock cycle level.
|
/* Support for accurate timing of PMD execution on TSC clock cycle level.
|
||||||
@@ -175,8 +257,14 @@ cycle_timer_stop(struct pmd_perf_stats *s,
|
|||||||
return now - timer->start;
|
return now - timer->start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Functions to initialize and reset the PMD performance metrics. */
|
||||||
|
|
||||||
void pmd_perf_stats_init(struct pmd_perf_stats *s);
|
void pmd_perf_stats_init(struct pmd_perf_stats *s);
|
||||||
void pmd_perf_stats_clear(struct pmd_perf_stats *s);
|
void pmd_perf_stats_clear(struct pmd_perf_stats *s);
|
||||||
|
void pmd_perf_stats_clear_lock(struct pmd_perf_stats *s);
|
||||||
|
|
||||||
|
/* Functions to read and update PMD counters. */
|
||||||
|
|
||||||
void pmd_perf_read_counters(struct pmd_perf_stats *s,
|
void pmd_perf_read_counters(struct pmd_perf_stats *s,
|
||||||
uint64_t stats[PMD_N_STATS]);
|
uint64_t stats[PMD_N_STATS]);
|
||||||
|
|
||||||
@@ -199,32 +287,95 @@ pmd_perf_update_counter(struct pmd_perf_stats *s,
|
|||||||
atomic_store_relaxed(&s->counters.n[counter], tmp);
|
atomic_store_relaxed(&s->counters.n[counter], tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
/* Functions to manipulate a sample history. */
|
||||||
pmd_perf_start_iteration(struct pmd_perf_stats *s)
|
|
||||||
{
|
|
||||||
if (OVS_LIKELY(s->last_tsc)) {
|
|
||||||
/* We assume here that last_tsc was updated immediately prior at
|
|
||||||
* the end of the previous iteration, or just before the first
|
|
||||||
* iteration. */
|
|
||||||
s->start_it_tsc = s->last_tsc;
|
|
||||||
} else {
|
|
||||||
/* In case last_tsc has never been set before. */
|
|
||||||
s->start_it_tsc = cycles_counter_update(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets)
|
histogram_add_sample(struct histogram *hist, uint32_t val)
|
||||||
{
|
{
|
||||||
uint64_t cycles = cycles_counter_update(s) - s->start_it_tsc;
|
/* TODO: Can do better with binary search? */
|
||||||
|
for (int i = 0; i < NUM_BINS-1; i++) {
|
||||||
if (rx_packets > 0) {
|
if (val <= hist->wall[i]) {
|
||||||
pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
|
hist->bin[i]++;
|
||||||
} else {
|
return;
|
||||||
pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
|
}
|
||||||
}
|
}
|
||||||
|
hist->bin[NUM_BINS-1]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t histogram_samples(const struct histogram *hist);
|
||||||
|
|
||||||
|
/* This function is used to advance the given history index by positive
|
||||||
|
* offset in the circular history buffer. */
|
||||||
|
static inline uint32_t
|
||||||
|
history_add(uint32_t idx, uint32_t offset)
|
||||||
|
{
|
||||||
|
return (idx + offset) % HISTORY_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This function computes the difference between two indices into the
|
||||||
|
* circular history buffer. The result is always positive in the range
|
||||||
|
* 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1
|
||||||
|
* starting from idx2. It can also be used to retreat the history index
|
||||||
|
* idx1 by idx2 steps. */
|
||||||
|
static inline uint32_t
|
||||||
|
history_sub(uint32_t idx1, uint32_t idx2)
|
||||||
|
{
|
||||||
|
return (idx1 + HISTORY_LEN - idx2) % HISTORY_LEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct iter_stats *
|
||||||
|
history_current(struct history *h)
|
||||||
|
{
|
||||||
|
return &h->sample[h->idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct iter_stats *
|
||||||
|
history_next(struct history *h)
|
||||||
|
{
|
||||||
|
size_t next_idx = history_add(h->idx, 1);
|
||||||
|
struct iter_stats *next = &h->sample[next_idx];
|
||||||
|
|
||||||
|
memset(next, 0, sizeof(*next));
|
||||||
|
h->idx = next_idx;
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct iter_stats *
|
||||||
|
history_store(struct history *h, struct iter_stats *is)
|
||||||
|
{
|
||||||
|
if (is) {
|
||||||
|
h->sample[h->idx] = *is;
|
||||||
|
}
|
||||||
|
/* Advance the history pointer */
|
||||||
|
return history_next(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Functions recording PMD metrics per iteration. */
|
||||||
|
|
||||||
|
void
|
||||||
|
pmd_perf_start_iteration(struct pmd_perf_stats *s);
|
||||||
|
void
|
||||||
|
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
|
||||||
|
int tx_packets, bool full_metrics);
|
||||||
|
|
||||||
|
/* Formatting the output of commands. */
|
||||||
|
|
||||||
|
struct pmd_perf_params {
|
||||||
|
int command_type;
|
||||||
|
bool histograms;
|
||||||
|
size_t iter_hist_len;
|
||||||
|
size_t ms_hist_len;
|
||||||
|
};
|
||||||
|
|
||||||
|
void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
|
||||||
|
double duration);
|
||||||
|
void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
|
||||||
|
void pmd_perf_format_iteration_history(struct ds *str,
|
||||||
|
struct pmd_perf_stats *s,
|
||||||
|
int n_iter);
|
||||||
|
void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
|
||||||
|
int n_ms);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
157
lib/dpif-netdev-unixctl.man
Normal file
157
lib/dpif-netdev-unixctl.man
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
.SS "DPIF-NETDEV COMMANDS"
|
||||||
|
These commands are used to expose internal information (mostly statistics)
|
||||||
|
about the "dpif-netdev" userspace datapath. If there is only one datapath
|
||||||
|
(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR
|
||||||
|
argument can be omitted. By default the commands present data for all pmd
|
||||||
|
threads in the datapath. By specifying the "-pmd Core" option one can filter
|
||||||
|
the output for a single pmd in the datapath.
|
||||||
|
.
|
||||||
|
.IP "\fBdpif-netdev/pmd-stats-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
|
||||||
|
Shows performance statistics for one or all pmd threads of the datapath
|
||||||
|
\fIdp\fR. The special thread "main" sums up the statistics of every non pmd
|
||||||
|
thread.
|
||||||
|
|
||||||
|
The sum of "emc hits", "masked hits" and "miss" is the number of
|
||||||
|
packet lookups performed by the datapath. Beware that a recirculated packet
|
||||||
|
experiences one additional lookup per recirculation, so there may be
|
||||||
|
more lookups than forwarded packets in the datapath.
|
||||||
|
|
||||||
|
Cycles are counted using the TSC or similar facilities (when available on
|
||||||
|
the platform). The duration of one cycle depends on the processing platform.
|
||||||
|
|
||||||
|
"idle cycles" refers to cycles spent in PMD iterations not forwarding any
|
||||||
|
any packets. "processing cycles" refers to cycles spent in PMD iterations
|
||||||
|
forwarding at least one packet, including the cost for polling, processing and
|
||||||
|
transmitting said packets.
|
||||||
|
|
||||||
|
To reset these counters use \fBdpif-netdev/pmd-stats-clear\fR.
|
||||||
|
.
|
||||||
|
.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]"
|
||||||
|
Resets to zero the per pmd thread performance numbers shown by the
|
||||||
|
\fBdpif-netdev/pmd-stats-show\fR and \fBdpif-netdev/pmd-perf-show\fR commands.
|
||||||
|
It will NOT reset datapath or bridge statistics, only the values shown by
|
||||||
|
the above commands.
|
||||||
|
.
|
||||||
|
.IP "\fBdpif-netdev/pmd-perf-show\fR [\fB-nh\fR] [\fB-it\fR \fIiter_len\fR] \
|
||||||
|
[\fB-ms\fR \fIms_len\fR] [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
|
||||||
|
Shows detailed performance metrics for one or all pmds threads of the
|
||||||
|
user space datapath.
|
||||||
|
|
||||||
|
The collection of detailed statistics can be controlled by a new
|
||||||
|
configuration parameter "other_config:pmd-perf-metrics". By default it
|
||||||
|
is disabled. The run-time overhead, when enabled, is in the order of 1%.
|
||||||
|
|
||||||
|
.RS
|
||||||
|
.IP
|
||||||
|
.PD .4v
|
||||||
|
.IP \(em
|
||||||
|
used cycles
|
||||||
|
.IP \(em
|
||||||
|
forwared packets
|
||||||
|
.IP \(em
|
||||||
|
number of rx batches
|
||||||
|
.IP \(em
|
||||||
|
packets/rx batch
|
||||||
|
.IP \(em
|
||||||
|
max. vhostuser queue fill level
|
||||||
|
.IP \(em
|
||||||
|
number of upcalls
|
||||||
|
.IP \(em
|
||||||
|
cycles spent in upcalls
|
||||||
|
.PD
|
||||||
|
.RE
|
||||||
|
.IP
|
||||||
|
This raw recorded data is used threefold:
|
||||||
|
|
||||||
|
.RS
|
||||||
|
.IP
|
||||||
|
.PD .4v
|
||||||
|
.IP 1.
|
||||||
|
In histograms for each of the following metrics:
|
||||||
|
.RS
|
||||||
|
.IP \(em
|
||||||
|
cycles/iteration (logarithmic)
|
||||||
|
.IP \(em
|
||||||
|
packets/iteration (logarithmic)
|
||||||
|
.IP \(em
|
||||||
|
cycles/packet
|
||||||
|
.IP \(em
|
||||||
|
packets/batch
|
||||||
|
.IP \(em
|
||||||
|
max. vhostuser qlen (logarithmic)
|
||||||
|
.IP \(em
|
||||||
|
upcalls
|
||||||
|
.IP \(em
|
||||||
|
cycles/upcall (logarithmic)
|
||||||
|
The histograms bins are divided linear or logarithmic.
|
||||||
|
.RE
|
||||||
|
.IP 2.
|
||||||
|
A cyclic history of the above metrics for 1024 iterations
|
||||||
|
.IP 3.
|
||||||
|
A cyclic history of the cummulative/average values per millisecond wall
|
||||||
|
clock for the last 1024 milliseconds:
|
||||||
|
.RS
|
||||||
|
.IP \(em
|
||||||
|
number of iterations
|
||||||
|
.IP \(em
|
||||||
|
avg. cycles/iteration
|
||||||
|
.IP \(em
|
||||||
|
packets (Kpps)
|
||||||
|
.IP \(em
|
||||||
|
avg. packets/batch
|
||||||
|
.IP \(em
|
||||||
|
avg. max vhost qlen
|
||||||
|
.IP \(em
|
||||||
|
upcalls
|
||||||
|
.IP \(em
|
||||||
|
avg. cycles/upcall
|
||||||
|
.RE
|
||||||
|
.PD
|
||||||
|
.RE
|
||||||
|
.IP
|
||||||
|
.
|
||||||
|
The command options are:
|
||||||
|
.RS
|
||||||
|
.IP "\fB-nh\fR"
|
||||||
|
Suppress the histograms
|
||||||
|
.IP "\fB-it\fR \fIiter_len\fR"
|
||||||
|
Display the last iter_len iteration stats
|
||||||
|
.IP "\fB-ms\fR \fIms_len\fR"
|
||||||
|
Display the last ms_len millisecond stats
|
||||||
|
.RE
|
||||||
|
.IP
|
||||||
|
The output always contains the following global PMD statistics:
|
||||||
|
.RS
|
||||||
|
.IP
|
||||||
|
Time: 15:24:55.270 .br
|
||||||
|
Measurement duration: 1.008 s
|
||||||
|
|
||||||
|
pmd thread numa_id 0 core_id 1:
|
||||||
|
|
||||||
|
Cycles: 2419034712 (2.40 GHz)
|
||||||
|
Iterations: 572817 (1.76 us/it)
|
||||||
|
- idle: 486808 (15.9 % cycles)
|
||||||
|
- busy: 86009 (84.1 % cycles)
|
||||||
|
Rx packets: 2399607 (2381 Kpps, 848 cycles/pkt)
|
||||||
|
Datapath passes: 3599415 (1.50 passes/pkt)
|
||||||
|
- EMC hits: 336472 ( 9.3 %)
|
||||||
|
- Megaflow hits: 3262943 (90.7 %, 1.00 subtbl lookups/hit)
|
||||||
|
- Upcalls: 0 ( 0.0 %, 0.0 us/upcall)
|
||||||
|
- Lost upcalls: 0 ( 0.0 %)
|
||||||
|
Tx packets: 2399607 (2381 Kpps)
|
||||||
|
Tx batches: 171400 (14.00 pkts/batch)
|
||||||
|
.RE
|
||||||
|
.IP
|
||||||
|
Here "Rx packets" actually reflects the number of packets forwarded by the
|
||||||
|
datapath. "Datapath passes" matches the number of packet lookups as
|
||||||
|
reported by the \fBdpif-netdev/pmd-stats-show\fR command.
|
||||||
|
|
||||||
|
To reset the counters and start a new measurement use
|
||||||
|
\fBdpif-netdev/pmd-stats-clear\fR.
|
||||||
|
.
|
||||||
|
.IP "\fBdpif-netdev/pmd-rxq-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
|
||||||
|
For one or all pmd threads of the datapath \fIdp\fR show the list of queue-ids
|
||||||
|
with port names, which this thread polls.
|
||||||
|
.
|
||||||
|
.IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
|
||||||
|
Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
|
@@ -49,6 +49,7 @@
|
|||||||
#include "id-pool.h"
|
#include "id-pool.h"
|
||||||
#include "latch.h"
|
#include "latch.h"
|
||||||
#include "netdev.h"
|
#include "netdev.h"
|
||||||
|
#include "netdev-provider.h"
|
||||||
#include "netdev-vport.h"
|
#include "netdev-vport.h"
|
||||||
#include "netlink.h"
|
#include "netlink.h"
|
||||||
#include "odp-execute.h"
|
#include "odp-execute.h"
|
||||||
@@ -281,6 +282,8 @@ struct dp_netdev {
|
|||||||
|
|
||||||
/* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
|
/* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
|
||||||
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
|
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
|
||||||
|
/* Enable collection of PMD performance metrics. */
|
||||||
|
atomic_bool pmd_perf_metrics;
|
||||||
|
|
||||||
/* Protects access to ofproto-dpif-upcall interface during revalidator
|
/* Protects access to ofproto-dpif-upcall interface during revalidator
|
||||||
* thread synchronization. */
|
* thread synchronization. */
|
||||||
@@ -356,6 +359,7 @@ struct dp_netdev_rxq {
|
|||||||
particular core. */
|
particular core. */
|
||||||
unsigned intrvl_idx; /* Write index for 'cycles_intrvl'. */
|
unsigned intrvl_idx; /* Write index for 'cycles_intrvl'. */
|
||||||
struct dp_netdev_pmd_thread *pmd; /* pmd thread that polls this queue. */
|
struct dp_netdev_pmd_thread *pmd; /* pmd thread that polls this queue. */
|
||||||
|
bool is_vhost; /* Is rxq of a vhost port. */
|
||||||
|
|
||||||
/* Counters of cycles spent successfully polling and processing pkts. */
|
/* Counters of cycles spent successfully polling and processing pkts. */
|
||||||
atomic_ullong cycles[RXQ_N_CYCLES];
|
atomic_ullong cycles[RXQ_N_CYCLES];
|
||||||
@@ -717,6 +721,8 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
|
|||||||
static void emc_clear_entry(struct emc_entry *ce);
|
static void emc_clear_entry(struct emc_entry *ce);
|
||||||
|
|
||||||
static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
|
static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
|
||||||
|
static inline bool
|
||||||
|
pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
emc_cache_init(struct emc_cache *flow_cache)
|
emc_cache_init(struct emc_cache *flow_cache)
|
||||||
@@ -800,7 +806,8 @@ get_dp_netdev(const struct dpif *dpif)
|
|||||||
enum pmd_info_type {
|
enum pmd_info_type {
|
||||||
PMD_INFO_SHOW_STATS, /* Show how cpu cycles are spent. */
|
PMD_INFO_SHOW_STATS, /* Show how cpu cycles are spent. */
|
||||||
PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
|
PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
|
||||||
PMD_INFO_SHOW_RXQ /* Show poll-lists of pmd threads. */
|
PMD_INFO_SHOW_RXQ, /* Show poll lists of pmd threads. */
|
||||||
|
PMD_INFO_PERF_SHOW, /* Show pmd performance details. */
|
||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -891,6 +898,47 @@ pmd_info_show_stats(struct ds *reply,
|
|||||||
stats[PMD_CYCLES_ITER_BUSY], total_packets);
|
stats[PMD_CYCLES_ITER_BUSY], total_packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmd_info_show_perf(struct ds *reply,
|
||||||
|
struct dp_netdev_pmd_thread *pmd,
|
||||||
|
struct pmd_perf_params *par)
|
||||||
|
{
|
||||||
|
if (pmd->core_id != NON_PMD_CORE_ID) {
|
||||||
|
char *time_str =
|
||||||
|
xastrftime_msec("%H:%M:%S.###", time_wall_msec(), true);
|
||||||
|
long long now = time_msec();
|
||||||
|
double duration = (now - pmd->perf_stats.start_ms) / 1000.0;
|
||||||
|
|
||||||
|
ds_put_cstr(reply, "\n");
|
||||||
|
ds_put_format(reply, "Time: %s\n", time_str);
|
||||||
|
ds_put_format(reply, "Measurement duration: %.3f s\n", duration);
|
||||||
|
ds_put_cstr(reply, "\n");
|
||||||
|
format_pmd_thread(reply, pmd);
|
||||||
|
ds_put_cstr(reply, "\n");
|
||||||
|
pmd_perf_format_overall_stats(reply, &pmd->perf_stats, duration);
|
||||||
|
if (pmd_perf_metrics_enabled(pmd)) {
|
||||||
|
/* Prevent parallel clearing of perf metrics. */
|
||||||
|
ovs_mutex_lock(&pmd->perf_stats.clear_mutex);
|
||||||
|
if (par->histograms) {
|
||||||
|
ds_put_cstr(reply, "\n");
|
||||||
|
pmd_perf_format_histograms(reply, &pmd->perf_stats);
|
||||||
|
}
|
||||||
|
if (par->iter_hist_len > 0) {
|
||||||
|
ds_put_cstr(reply, "\n");
|
||||||
|
pmd_perf_format_iteration_history(reply, &pmd->perf_stats,
|
||||||
|
par->iter_hist_len);
|
||||||
|
}
|
||||||
|
if (par->ms_hist_len > 0) {
|
||||||
|
ds_put_cstr(reply, "\n");
|
||||||
|
pmd_perf_format_ms_history(reply, &pmd->perf_stats,
|
||||||
|
par->ms_hist_len);
|
||||||
|
}
|
||||||
|
ovs_mutex_unlock(&pmd->perf_stats.clear_mutex);
|
||||||
|
}
|
||||||
|
free(time_str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compare_poll_list(const void *a_, const void *b_)
|
compare_poll_list(const void *a_, const void *b_)
|
||||||
{
|
{
|
||||||
@@ -1068,7 +1116,7 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
|
|||||||
ovs_mutex_lock(&dp_netdev_mutex);
|
ovs_mutex_lock(&dp_netdev_mutex);
|
||||||
|
|
||||||
while (argc > 1) {
|
while (argc > 1) {
|
||||||
if (!strcmp(argv[1], "-pmd") && argc >= 3) {
|
if (!strcmp(argv[1], "-pmd") && argc > 2) {
|
||||||
if (str_to_uint(argv[2], 10, &core_id)) {
|
if (str_to_uint(argv[2], 10, &core_id)) {
|
||||||
filter_on_pmd = true;
|
filter_on_pmd = true;
|
||||||
}
|
}
|
||||||
@@ -1108,6 +1156,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
|
|||||||
pmd_perf_stats_clear(&pmd->perf_stats);
|
pmd_perf_stats_clear(&pmd->perf_stats);
|
||||||
} else if (type == PMD_INFO_SHOW_STATS) {
|
} else if (type == PMD_INFO_SHOW_STATS) {
|
||||||
pmd_info_show_stats(&reply, pmd);
|
pmd_info_show_stats(&reply, pmd);
|
||||||
|
} else if (type == PMD_INFO_PERF_SHOW) {
|
||||||
|
pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(pmd_list);
|
free(pmd_list);
|
||||||
@@ -1117,6 +1167,48 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
|
|||||||
unixctl_command_reply(conn, ds_cstr(&reply));
|
unixctl_command_reply(conn, ds_cstr(&reply));
|
||||||
ds_destroy(&reply);
|
ds_destroy(&reply);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
|
||||||
|
const char *argv[],
|
||||||
|
void *aux OVS_UNUSED)
|
||||||
|
{
|
||||||
|
struct pmd_perf_params par;
|
||||||
|
long int it_hist = 0, ms_hist = 0;
|
||||||
|
par.histograms = true;
|
||||||
|
|
||||||
|
while (argc > 1) {
|
||||||
|
if (!strcmp(argv[1], "-nh")) {
|
||||||
|
par.histograms = false;
|
||||||
|
argc -= 1;
|
||||||
|
argv += 1;
|
||||||
|
} else if (!strcmp(argv[1], "-it") && argc > 2) {
|
||||||
|
it_hist = strtol(argv[2], NULL, 10);
|
||||||
|
if (it_hist < 0) {
|
||||||
|
it_hist = 0;
|
||||||
|
} else if (it_hist > HISTORY_LEN) {
|
||||||
|
it_hist = HISTORY_LEN;
|
||||||
|
}
|
||||||
|
argc -= 2;
|
||||||
|
argv += 2;
|
||||||
|
} else if (!strcmp(argv[1], "-ms") && argc > 2) {
|
||||||
|
ms_hist = strtol(argv[2], NULL, 10);
|
||||||
|
if (ms_hist < 0) {
|
||||||
|
ms_hist = 0;
|
||||||
|
} else if (ms_hist > HISTORY_LEN) {
|
||||||
|
ms_hist = HISTORY_LEN;
|
||||||
|
}
|
||||||
|
argc -= 2;
|
||||||
|
argv += 2;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
par.iter_hist_len = it_hist;
|
||||||
|
par.ms_hist_len = ms_hist;
|
||||||
|
par.command_type = PMD_INFO_PERF_SHOW;
|
||||||
|
dpif_netdev_pmd_info(conn, argc, argv, &par);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dpif_netdev_init(void)
|
dpif_netdev_init(void)
|
||||||
@@ -1134,6 +1226,12 @@ dpif_netdev_init(void)
|
|||||||
unixctl_command_register("dpif-netdev/pmd-rxq-show", "[-pmd core] [dp]",
|
unixctl_command_register("dpif-netdev/pmd-rxq-show", "[-pmd core] [dp]",
|
||||||
0, 3, dpif_netdev_pmd_info,
|
0, 3, dpif_netdev_pmd_info,
|
||||||
(void *)&poll_aux);
|
(void *)&poll_aux);
|
||||||
|
unixctl_command_register("dpif-netdev/pmd-perf-show",
|
||||||
|
"[-nh] [-it iter-history-len]"
|
||||||
|
" [-ms ms-history-len]"
|
||||||
|
" [-pmd core] [dp]",
|
||||||
|
0, 8, pmd_perf_show_cmd,
|
||||||
|
NULL);
|
||||||
unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
|
unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
|
||||||
0, 1, dpif_netdev_pmd_rebalance,
|
0, 1, dpif_netdev_pmd_rebalance,
|
||||||
NULL);
|
NULL);
|
||||||
@@ -3021,6 +3119,18 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool perf_enabled = smap_get_bool(other_config, "pmd-perf-metrics", false);
|
||||||
|
bool cur_perf_enabled;
|
||||||
|
atomic_read_relaxed(&dp->pmd_perf_metrics, &cur_perf_enabled);
|
||||||
|
if (perf_enabled != cur_perf_enabled) {
|
||||||
|
atomic_store_relaxed(&dp->pmd_perf_metrics, perf_enabled);
|
||||||
|
if (perf_enabled) {
|
||||||
|
VLOG_INFO("PMD performance metrics collection enabled");
|
||||||
|
} else {
|
||||||
|
VLOG_INFO("PMD performance metrics collection disabled");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3190,6 +3300,25 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx)
|
|||||||
return processing_cycles;
|
return processing_cycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ATOMIC_ALWAYS_LOCK_FREE_8B
|
||||||
|
static inline bool
|
||||||
|
pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd)
|
||||||
|
{
|
||||||
|
bool pmd_perf_enabled;
|
||||||
|
atomic_read_relaxed(&pmd->dp->pmd_perf_metrics, &pmd_perf_enabled);
|
||||||
|
return pmd_perf_enabled;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* If stores and reads of 64-bit integers are not atomic, the full PMD
|
||||||
|
* performance metrics are not available as locked access to 64 bit
|
||||||
|
* integers would be prohibitively expensive. */
|
||||||
|
static inline bool
|
||||||
|
pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
|
dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
|
||||||
struct tx_port *p)
|
struct tx_port *p)
|
||||||
@@ -3265,10 +3394,12 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
|
|||||||
struct dp_netdev_rxq *rxq,
|
struct dp_netdev_rxq *rxq,
|
||||||
odp_port_t port_no)
|
odp_port_t port_no)
|
||||||
{
|
{
|
||||||
|
struct pmd_perf_stats *s = &pmd->perf_stats;
|
||||||
struct dp_packet_batch batch;
|
struct dp_packet_batch batch;
|
||||||
struct cycle_timer timer;
|
struct cycle_timer timer;
|
||||||
int error;
|
int error;
|
||||||
int batch_cnt = 0, output_cnt = 0;
|
int batch_cnt = 0;
|
||||||
|
int rem_qlen = 0, *qlen_p = NULL;
|
||||||
uint64_t cycles;
|
uint64_t cycles;
|
||||||
|
|
||||||
/* Measure duration for polling and processing rx burst. */
|
/* Measure duration for polling and processing rx burst. */
|
||||||
@@ -3277,20 +3408,37 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
|
|||||||
pmd->ctx.last_rxq = rxq;
|
pmd->ctx.last_rxq = rxq;
|
||||||
dp_packet_batch_init(&batch);
|
dp_packet_batch_init(&batch);
|
||||||
|
|
||||||
error = netdev_rxq_recv(rxq->rx, &batch, NULL);
|
/* Fetch the rx queue length only for vhostuser ports. */
|
||||||
|
if (pmd_perf_metrics_enabled(pmd) && rxq->is_vhost) {
|
||||||
|
qlen_p = &rem_qlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = netdev_rxq_recv(rxq->rx, &batch, qlen_p);
|
||||||
if (!error) {
|
if (!error) {
|
||||||
/* At least one packet received. */
|
/* At least one packet received. */
|
||||||
*recirc_depth_get() = 0;
|
*recirc_depth_get() = 0;
|
||||||
pmd_thread_ctx_time_update(pmd);
|
pmd_thread_ctx_time_update(pmd);
|
||||||
|
|
||||||
batch_cnt = batch.count;
|
batch_cnt = batch.count;
|
||||||
|
if (pmd_perf_metrics_enabled(pmd)) {
|
||||||
|
/* Update batch histogram. */
|
||||||
|
s->current.batches++;
|
||||||
|
histogram_add_sample(&s->pkts_per_batch, batch_cnt);
|
||||||
|
/* Update the maximum vhost rx queue fill level. */
|
||||||
|
if (rxq->is_vhost && rem_qlen >= 0) {
|
||||||
|
uint32_t qfill = batch_cnt + rem_qlen;
|
||||||
|
if (qfill > s->current.max_vhost_qfill) {
|
||||||
|
s->current.max_vhost_qfill = qfill;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Process packet batch. */
|
||||||
dp_netdev_input(pmd, &batch, port_no);
|
dp_netdev_input(pmd, &batch, port_no);
|
||||||
|
|
||||||
/* Assign processing cycles to rx queue. */
|
/* Assign processing cycles to rx queue. */
|
||||||
cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
|
cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
|
||||||
dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
|
dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
|
||||||
|
|
||||||
output_cnt = dp_netdev_pmd_flush_output_packets(pmd, false);
|
dp_netdev_pmd_flush_output_packets(pmd, false);
|
||||||
} else {
|
} else {
|
||||||
/* Discard cycles. */
|
/* Discard cycles. */
|
||||||
cycle_timer_stop(&pmd->perf_stats, &timer);
|
cycle_timer_stop(&pmd->perf_stats, &timer);
|
||||||
@@ -3304,7 +3452,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
|
|||||||
|
|
||||||
pmd->ctx.last_rxq = NULL;
|
pmd->ctx.last_rxq = NULL;
|
||||||
|
|
||||||
return batch_cnt + output_cnt;
|
return batch_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct tx_port *
|
static struct tx_port *
|
||||||
@@ -3360,6 +3508,7 @@ port_reconfigure(struct dp_netdev_port *port)
|
|||||||
}
|
}
|
||||||
|
|
||||||
port->rxqs[i].port = port;
|
port->rxqs[i].port = port;
|
||||||
|
port->rxqs[i].is_vhost = !strncmp(port->type, "dpdkvhost", 9);
|
||||||
|
|
||||||
err = netdev_rxq_open(netdev, &port->rxqs[i].rx, i);
|
err = netdev_rxq_open(netdev, &port->rxqs[i].rx, i);
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -4138,23 +4287,26 @@ reload:
|
|||||||
pmd->intrvl_tsc_prev = 0;
|
pmd->intrvl_tsc_prev = 0;
|
||||||
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
|
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
|
||||||
cycles_counter_update(s);
|
cycles_counter_update(s);
|
||||||
|
/* Protect pmd stats from external clearing while polling. */
|
||||||
|
ovs_mutex_lock(&pmd->perf_stats.stats_mutex);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
uint64_t iter_packets = 0;
|
uint64_t rx_packets = 0, tx_packets = 0;
|
||||||
|
|
||||||
pmd_perf_start_iteration(s);
|
pmd_perf_start_iteration(s);
|
||||||
|
|
||||||
for (i = 0; i < poll_cnt; i++) {
|
for (i = 0; i < poll_cnt; i++) {
|
||||||
process_packets =
|
process_packets =
|
||||||
dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
|
dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
|
||||||
poll_list[i].port_no);
|
poll_list[i].port_no);
|
||||||
iter_packets += process_packets;
|
rx_packets += process_packets;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!iter_packets) {
|
if (!rx_packets) {
|
||||||
/* We didn't receive anything in the process loop.
|
/* We didn't receive anything in the process loop.
|
||||||
* Check if we need to send something.
|
* Check if we need to send something.
|
||||||
* There was no time updates on current iteration. */
|
* There was no time updates on current iteration. */
|
||||||
pmd_thread_ctx_time_update(pmd);
|
pmd_thread_ctx_time_update(pmd);
|
||||||
iter_packets += dp_netdev_pmd_flush_output_packets(pmd, false);
|
tx_packets = dp_netdev_pmd_flush_output_packets(pmd, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lc++ > 1024) {
|
if (lc++ > 1024) {
|
||||||
@@ -4173,8 +4325,10 @@ reload:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pmd_perf_end_iteration(s, iter_packets);
|
pmd_perf_end_iteration(s, rx_packets, tx_packets,
|
||||||
|
pmd_perf_metrics_enabled(pmd));
|
||||||
}
|
}
|
||||||
|
ovs_mutex_unlock(&pmd->perf_stats.stats_mutex);
|
||||||
|
|
||||||
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
|
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
|
||||||
exiting = latch_is_set(&pmd->exit_latch);
|
exiting = latch_is_set(&pmd->exit_latch);
|
||||||
@@ -5069,6 +5223,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
|
|||||||
struct match match;
|
struct match match;
|
||||||
ovs_u128 ufid;
|
ovs_u128 ufid;
|
||||||
int error;
|
int error;
|
||||||
|
uint64_t cycles = cycles_counter_update(&pmd->perf_stats);
|
||||||
|
|
||||||
match.tun_md.valid = false;
|
match.tun_md.valid = false;
|
||||||
miniflow_expand(&key->mf, &match.flow);
|
miniflow_expand(&key->mf, &match.flow);
|
||||||
@@ -5122,6 +5277,14 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
|
|||||||
ovs_mutex_unlock(&pmd->flow_mutex);
|
ovs_mutex_unlock(&pmd->flow_mutex);
|
||||||
emc_probabilistic_insert(pmd, key, netdev_flow);
|
emc_probabilistic_insert(pmd, key, netdev_flow);
|
||||||
}
|
}
|
||||||
|
if (pmd_perf_metrics_enabled(pmd)) {
|
||||||
|
/* Update upcall stats. */
|
||||||
|
cycles = cycles_counter_update(&pmd->perf_stats) - cycles;
|
||||||
|
struct pmd_perf_stats *s = &pmd->perf_stats;
|
||||||
|
s->current.upcalls++;
|
||||||
|
s->current.upcall_cycles += cycles;
|
||||||
|
histogram_add_sample(&s->cycles_per_upcall, cycles);
|
||||||
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -252,6 +252,7 @@ vswitchd/ovs-vswitchd.8: \
|
|||||||
lib/coverage-unixctl.man \
|
lib/coverage-unixctl.man \
|
||||||
lib/daemon.man \
|
lib/daemon.man \
|
||||||
lib/dpctl.man \
|
lib/dpctl.man \
|
||||||
|
lib/dpif-netdev-unixctl.man \
|
||||||
lib/memory-unixctl.man \
|
lib/memory-unixctl.man \
|
||||||
lib/netdev-dpdk-unixctl.man \
|
lib/netdev-dpdk-unixctl.man \
|
||||||
lib/service.man \
|
lib/service.man \
|
||||||
@@ -269,6 +270,7 @@ lib/common.man:
|
|||||||
lib/coverage-unixctl.man:
|
lib/coverage-unixctl.man:
|
||||||
lib/daemon.man:
|
lib/daemon.man:
|
||||||
lib/dpctl.man:
|
lib/dpctl.man:
|
||||||
|
lib/dpif-netdev-unixctl.man:
|
||||||
lib/memory-unixctl.man:
|
lib/memory-unixctl.man:
|
||||||
lib/netdev-dpdk-unixctl.man:
|
lib/netdev-dpdk-unixctl.man:
|
||||||
lib/service.man:
|
lib/service.man:
|
||||||
|
@@ -264,32 +264,7 @@ type).
|
|||||||
..
|
..
|
||||||
.so lib/dpctl.man
|
.so lib/dpctl.man
|
||||||
.
|
.
|
||||||
.SS "DPIF-NETDEV COMMANDS"
|
.so lib/dpif-netdev-unixctl.man
|
||||||
These commands are used to expose internal information (mostly statistics)
|
|
||||||
about the ``dpif-netdev'' userspace datapath. If there is only one datapath
|
|
||||||
(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR
|
|
||||||
argument can be omitted.
|
|
||||||
.IP "\fBdpif-netdev/pmd-stats-show\fR [\fIdp\fR]"
|
|
||||||
Shows performance statistics for each pmd thread of the datapath \fIdp\fR.
|
|
||||||
The special thread ``main'' sums up the statistics of every non pmd thread.
|
|
||||||
The sum of ``emc hits'', ``masked hits'' and ``miss'' is the number of
|
|
||||||
packets received by the datapath. Cycles are counted using the TSC or similar
|
|
||||||
facilities (when available on the platform). To reset these counters use
|
|
||||||
\fBdpif-netdev/pmd-stats-clear\fR. The duration of one cycle depends on the
|
|
||||||
measuring infrastructure. ``idle cycles'' refers to cycles spent polling
|
|
||||||
devices but not receiving any packets. ``processing cycles'' refers to cycles
|
|
||||||
spent polling devices and successfully receiving packets, plus the cycles
|
|
||||||
spent processing said packets.
|
|
||||||
.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]"
|
|
||||||
Resets to zero the per pmd thread performance numbers shown by the
|
|
||||||
\fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or
|
|
||||||
bridge statistics, only the values shown by the above command.
|
|
||||||
.IP "\fBdpif-netdev/pmd-rxq-show\fR [\fIdp\fR]"
|
|
||||||
For each pmd thread of the datapath \fIdp\fR shows list of queue-ids with
|
|
||||||
port names, which this thread polls.
|
|
||||||
.IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
|
|
||||||
Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
|
|
||||||
.
|
|
||||||
.so lib/netdev-dpdk-unixctl.man
|
.so lib/netdev-dpdk-unixctl.man
|
||||||
.so ofproto/ofproto-dpif-unixctl.man
|
.so ofproto/ofproto-dpif-unixctl.man
|
||||||
.so ofproto/ofproto-unixctl.man
|
.so ofproto/ofproto-unixctl.man
|
||||||
|
@@ -375,6 +375,18 @@
|
|||||||
</p>
|
</p>
|
||||||
</column>
|
</column>
|
||||||
|
|
||||||
|
<column name="other_config" key="pmd-perf-metrics"
|
||||||
|
type='{"type": "boolean"}'>
|
||||||
|
<p>
|
||||||
|
Enables recording of detailed PMD performance metrics for analysis
|
||||||
|
and trouble-shooting. This can have a performance impact in the
|
||||||
|
order of 1%.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Defaults to false but can be changed at any time.
|
||||||
|
</p>
|
||||||
|
</column>
|
||||||
|
|
||||||
<column name="other_config" key="n-handler-threads"
|
<column name="other_config" key="n-handler-threads"
|
||||||
type='{"type": "integer", "minInteger": 1}'>
|
type='{"type": "integer", "minInteger": 1}'>
|
||||||
<p>
|
<p>
|
||||||
|
Reference in New Issue
Block a user