2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-02 07:15:17 +00:00

dpif-netdev: Detailed performance stats for PMDs

This patch instruments the dpif-netdev datapath to record detailed
statistics of what is happening in every iteration of a PMD thread.

The collection of detailed statistics can be controlled by a new
Open_vSwitch configuration parameter "other_config:pmd-perf-metrics".
By default it is disabled. The run-time overhead, when enabled, is
in the order of 1%.

The covered metrics per iteration are:
  - cycles
  - packets
  - (rx) batches
  - packets/batch
  - max. vhostuser qlen
  - upcalls
  - cycles spent in upcalls

This raw recorded data is used threefold:

1. In histograms for each of the following metrics:
   - cycles/iteration (log.)
   - packets/iteration (log.)
   - cycles/packet
   - packets/batch
   - max. vhostuser qlen (log.)
   - upcalls
   - cycles/upcall (log)
   The histograms bins are divided linear or logarithmic.

2. A cyclic history of the above statistics for 999 iterations

3. A cyclic history of the cummulative/average values per millisecond
   wall clock for the last 1000 milliseconds:
   - number of iterations
   - avg. cycles/iteration
   - packets (Kpps)
   - avg. packets/batch
   - avg. max vhost qlen
   - upcalls
   - avg. cycles/upcall

The gathered performance metrics can be printed at any time with the
new CLI command

ovs-appctl dpif-netdev/pmd-perf-show [-nh] [-it iter_len] [-ms ms_len]
    [-pmd core] [dp]

The options are

-nh:            Suppress the histograms
-it iter_len:   Display the last iter_len iteration stats
-ms ms_len:     Display the last ms_len millisecond stats
-pmd core:      Display only the specified PMD

The performance statistics are reset with the existing
dpif-netdev/pmd-stats-clear command.

The output always contains the following global PMD statistics,
similar to the pmd-stats-show command:

Time: 15:24:55.270
Measurement duration: 1.008 s

pmd thread numa_id 0 core_id 1:

  Cycles:            2419034712  (2.40 GHz)
  Iterations:            572817  (1.76 us/it)
  - idle:                486808  (15.9 % cycles)
  - busy:                 86009  (84.1 % cycles)
  Rx packets:           2399607  (2381 Kpps, 848 cycles/pkt)
  Datapath passes:      3599415  (1.50 passes/pkt)
  - EMC hits:            336472  ( 9.3 %)
  - Megaflow hits:      3262943  (90.7 %, 1.00 subtbl lookups/hit)
  - Upcalls:                  0  ( 0.0 %, 0.0 us/upcall)
  - Lost upcalls:             0  ( 0.0 %)
  Tx packets:           2399607  (2381 Kpps)
  Tx batches:            171400  (14.00 pkts/batch)

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Acked-by: Billy O'Mahony <billy.o.mahony@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
Jan Scheurich
2018-04-19 19:40:45 +02:00
committed by Ian Stokes
parent 8492adc270
commit 79f368756c
9 changed files with 990 additions and 68 deletions

4
NEWS
View File

@@ -27,6 +27,10 @@ Post-v2.9.0
- DPDK:
* New 'check-dpdk' Makefile target to run a new system testsuite.
See Testing topic for the details.
- Userspace datapath:
* Commands ovs-appctl dpif-netdev/pmd-*-show can now work on a single PMD
* Detailed PMD performance metrics available with new command
ovs-appctl dpif-netdev/pmd-perf-show
v2.9.0 - 19 Feb 2018
--------------------

View File

@@ -492,6 +492,7 @@ MAN_FRAGMENTS += \
lib/dpctl.man \
lib/memory-unixctl.man \
lib/netdev-dpdk-unixctl.man \
lib/dpif-netdev-unixctl.man \
lib/ofp-version.man \
lib/ovs.tmac \
lib/service.man \

View File

@@ -15,18 +15,333 @@
*/
#include <config.h>
#include <stdint.h>
#include "dpif-netdev-perf.h"
#include "openvswitch/dynamic-string.h"
#include "openvswitch/vlog.h"
#include "dpif-netdev-perf.h"
#include "ovs-thread.h"
#include "timeval.h"
VLOG_DEFINE_THIS_MODULE(pmd_perf);
#ifdef DPDK_NETDEV
static uint64_t
get_tsc_hz(void)
{
return rte_get_tsc_hz();
}
#else
/* This function is only invoked from PMD threads which depend on DPDK.
* A dummy function is sufficient when building without DPDK_NETDEV. */
static uint64_t
get_tsc_hz(void)
{
return 1;
}
#endif
/* Histogram functions. */
static void
histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max)
{
int i;
ovs_assert(min < max);
for (i = 0; i < NUM_BINS-1; i++) {
hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2);
}
hist->wall[NUM_BINS-1] = UINT32_MAX;
}
static void
histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max)
{
int i, start, bins, wall;
double log_min, log_max;
ovs_assert(min < max);
if (min > 0) {
log_min = log(min);
log_max = log(max);
start = 0;
bins = NUM_BINS - 1;
} else {
hist->wall[0] = 0;
log_min = log(1);
log_max = log(max);
start = 1;
bins = NUM_BINS - 2;
}
wall = start;
for (i = 0; i < bins; i++) {
/* Make sure each wall is monotonically increasing. */
wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / (bins-1)));
hist->wall[start + i] = wall++;
}
if (hist->wall[NUM_BINS-2] < max) {
hist->wall[NUM_BINS-2] = max;
}
hist->wall[NUM_BINS-1] = UINT32_MAX;
}
uint64_t
histogram_samples(const struct histogram *hist)
{
uint64_t samples = 0;
for (int i = 0; i < NUM_BINS; i++) {
samples += hist->bin[i];
}
return samples;
}
static void
histogram_clear(struct histogram *hist)
{
int i;
for (i = 0; i < NUM_BINS; i++) {
hist->bin[i] = 0;
}
}
static void
history_init(struct history *h)
{
memset(h, 0, sizeof(*h));
}
void
pmd_perf_stats_init(struct pmd_perf_stats *s)
{
memset(s, 0 , sizeof(*s));
memset(s, 0, sizeof(*s));
ovs_mutex_init(&s->stats_mutex);
ovs_mutex_init(&s->clear_mutex);
/* Logarithmic histogram for cycles/it ranging from 500 to 24M
* (corresponding to 200 ns to 9.6 ms at 2.5 GHz TSC clock). */
histogram_walls_set_log(&s->cycles, 500, 24000000);
/* Logarithmic histogram for pkts/it ranging from 0 to 1000. */
histogram_walls_set_log(&s->pkts, 0, 1000);
/* Linear histogram for cycles/pkt ranging from 100 to 30K. */
histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000);
/* Linear histogram for pkts/rx batch ranging from 0 to 32,
* the maximum rx batch size in OVS. */
histogram_walls_set_lin(&s->pkts_per_batch, 0, 32);
/* Linear histogram for upcalls/it ranging from 0 to 30. */
histogram_walls_set_lin(&s->upcalls, 0, 30);
/* Logarithmic histogram for cycles/upcall ranging from 1000 to 1M
* (corresponding to 400 ns to 400 us at 2.5 GHz TSC clock). */
histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
/* Log. histogram for max vhostuser queue fill level from 0 to 512.
* 512 is the maximum fill level for a virtio queue with 1024
* descriptors (maximum configurable length in Qemu), with the
* DPDK 17.11 virtio PMD in the guest. */
histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
s->iteration_cnt = 0;
s->start_ms = time_msec();
}
void
pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
double duration)
{
uint64_t stats[PMD_N_STATS];
double us_per_cycle = 1000000.0 / get_tsc_hz();
if (duration == 0) {
return;
}
pmd_perf_read_counters(s, stats);
uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] +
stats[PMD_CYCLES_ITER_BUSY];
uint64_t rx_packets = stats[PMD_STAT_RECV];
uint64_t tx_packets = stats[PMD_STAT_SENT_PKTS];
uint64_t tx_batches = stats[PMD_STAT_SENT_BATCHES];
uint64_t passes = stats[PMD_STAT_RECV] +
stats[PMD_STAT_RECIRC];
uint64_t upcalls = stats[PMD_STAT_MISS];
uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL];
uint64_t tot_iter = histogram_samples(&s->pkts);
uint64_t idle_iter = s->pkts.bin[0];
uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0;
ds_put_format(str,
" Cycles: %12"PRIu64" (%.2f GHz)\n"
" Iterations: %12"PRIu64" (%.2f us/it)\n"
" - idle: %12"PRIu64" (%4.1f %% cycles)\n"
" - busy: %12"PRIu64" (%4.1f %% cycles)\n",
tot_cycles, (tot_cycles / duration) / 1E9,
tot_iter, tot_cycles * us_per_cycle / tot_iter,
idle_iter,
100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles,
busy_iter,
100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles);
if (rx_packets > 0) {
ds_put_format(str,
" Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n"
" Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n"
" - EMC hits: %12"PRIu64" (%4.1f %%)\n"
" - Megaflow hits: %12"PRIu64" (%4.1f %%, %.2f subtbl lookups/"
"hit)\n"
" - Upcalls: %12"PRIu64" (%4.1f %%, %.1f us/upcall)\n"
" - Lost upcalls: %12"PRIu64" (%4.1f %%)\n",
rx_packets, (rx_packets / duration) / 1000,
1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
passes, rx_packets ? 1.0 * passes / rx_packets : 0,
stats[PMD_STAT_EXACT_HIT],
100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
stats[PMD_STAT_MASKED_HIT],
100.0 * stats[PMD_STAT_MASKED_HIT] / passes,
stats[PMD_STAT_MASKED_HIT]
? 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / stats[PMD_STAT_MASKED_HIT]
: 0,
upcalls, 100.0 * upcalls / passes,
upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0,
stats[PMD_STAT_LOST],
100.0 * stats[PMD_STAT_LOST] / passes);
} else {
ds_put_format(str, " Rx packets: %12d\n", 0);
}
if (tx_packets > 0) {
ds_put_format(str,
" Tx packets: %12"PRIu64" (%.0f Kpps)\n"
" Tx batches: %12"PRIu64" (%.2f pkts/batch)"
"\n",
tx_packets, (tx_packets / duration) / 1000,
tx_batches, 1.0 * tx_packets / tx_batches);
} else {
ds_put_format(str, " Tx packets: %12d\n\n", 0);
}
}
void
pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
{
int i;
ds_put_cstr(str, "Histograms\n");
ds_put_format(str,
" %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
"cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
"max vhost qlen", "upcalls/it", "cycles/upcall");
for (i = 0; i < NUM_BINS-1; i++) {
ds_put_format(str,
" %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
" %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
" %-9d %-11"PRIu64"\n",
s->cycles.wall[i], s->cycles.bin[i],
s->pkts.wall[i],s->pkts.bin[i],
s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i],
s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
s->upcalls.wall[i], s->upcalls.bin[i],
s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
}
ds_put_format(str,
" %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
" %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
" %-9s %-11"PRIu64"\n",
">", s->cycles.bin[i],
">", s->pkts.bin[i],
">", s->cycles_per_pkt.bin[i],
">", s->pkts_per_batch.bin[i],
">", s->max_vhost_qfill.bin[i],
">", s->upcalls.bin[i],
">", s->cycles_per_upcall.bin[i]);
if (s->totals.iterations > 0) {
ds_put_cstr(str,
"-----------------------------------------------------"
"-----------------------------------------------------"
"------------------------------------------------\n");
ds_put_format(str,
" %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
"cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
"vhost qlen", "upcalls/it", "cycles/upcall");
ds_put_format(str,
" %-21"PRIu64" %-21.5f %-21"PRIu64
" %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n",
s->totals.cycles / s->totals.iterations,
1.0 * s->totals.pkts / s->totals.iterations,
s->totals.pkts
? s->totals.busy_cycles / s->totals.pkts : 0,
s->totals.batches
? 1.0 * s->totals.pkts / s->totals.batches : 0,
1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
1.0 * s->totals.upcalls / s->totals.iterations,
s->totals.upcalls
? s->totals.upcall_cycles / s->totals.upcalls : 0);
}
}
void
pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
int n_iter)
{
struct iter_stats *is;
size_t index;
int i;
if (n_iter == 0) {
return;
}
ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s "
"%-10s %-10s %-10s\n",
"iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
"vhost qlen", "upcalls", "cycles/upcall");
for (i = 1; i <= n_iter; i++) {
index = history_sub(s->iterations.idx, i);
is = &s->iterations.sample[index];
ds_put_format(str,
" %-17"PRIu64" %-11"PRIu64" %-11"PRIu32
" %-11"PRIu64" %-11"PRIu32" %-11"PRIu32
" %-11"PRIu32" %-11"PRIu32"\n",
is->timestamp,
is->cycles,
is->pkts,
is->pkts ? is->cycles / is->pkts : 0,
is->batches ? is->pkts / is->batches : 0,
is->max_vhost_qfill,
is->upcalls,
is->upcalls ? is->upcall_cycles / is->upcalls : 0);
}
}
void
pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
{
struct iter_stats *is;
size_t index;
int i;
if (n_ms == 0) {
return;
}
ds_put_format(str,
" %-12s %-10s %-10s %-10s %-10s"
" %-10s %-10s %-10s %-10s\n",
"ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
"pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
for (i = 1; i <= n_ms; i++) {
index = history_sub(s->milliseconds.idx, i);
is = &s->milliseconds.sample[index];
ds_put_format(str,
" %-12"PRIu64" %-11"PRIu32" %-11"PRIu64
" %-11"PRIu32" %-11"PRIu64" %-11"PRIu32
" %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n",
is->timestamp,
is->iterations,
is->iterations ? is->cycles / is->iterations : 0,
is->pkts,
is->pkts ? is->busy_cycles / is->pkts : 0,
is->batches ? is->pkts / is->batches : 0,
is->iterations
? is->max_vhost_qfill / is->iterations : 0,
is->upcalls,
is->upcalls ? is->upcall_cycles / is->upcalls : 0);
}
}
void
@@ -51,10 +366,152 @@ pmd_perf_read_counters(struct pmd_perf_stats *s,
}
}
/* This function clears the PMD performance counters from within the PMD
* thread or from another thread when the PMD thread is not executing its
* poll loop. */
void
pmd_perf_stats_clear(struct pmd_perf_stats *s)
pmd_perf_stats_clear_lock(struct pmd_perf_stats *s)
OVS_REQUIRES(s->stats_mutex)
{
ovs_mutex_lock(&s->clear_mutex);
for (int i = 0; i < PMD_N_STATS; i++) {
atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
}
/* The following stats are only applicable in PMD thread and */
memset(&s->current, 0 , sizeof(struct iter_stats));
memset(&s->totals, 0 , sizeof(struct iter_stats));
histogram_clear(&s->cycles);
histogram_clear(&s->pkts);
histogram_clear(&s->cycles_per_pkt);
histogram_clear(&s->upcalls);
histogram_clear(&s->cycles_per_upcall);
histogram_clear(&s->pkts_per_batch);
histogram_clear(&s->max_vhost_qfill);
history_init(&s->iterations);
history_init(&s->milliseconds);
s->start_ms = time_msec();
s->milliseconds.sample[0].timestamp = s->start_ms;
/* Clearing finished. */
s->clear = false;
ovs_mutex_unlock(&s->clear_mutex);
}
/* This function can be called from the anywhere to clear the stats
* of PMD and non-PMD threads. */
void
pmd_perf_stats_clear(struct pmd_perf_stats *s)
{
if (ovs_mutex_trylock(&s->stats_mutex) == 0) {
/* Locking successful. PMD not polling. */
pmd_perf_stats_clear_lock(s);
ovs_mutex_unlock(&s->stats_mutex);
} else {
/* Request the polling PMD to clear the stats. There is no need to
* block here as stats retrieval is prevented during clearing. */
s->clear = true;
}
}
/* Functions recording PMD metrics per iteration. */
inline void
pmd_perf_start_iteration(struct pmd_perf_stats *s)
OVS_REQUIRES(s->stats_mutex)
{
if (s->clear) {
/* Clear the PMD stats before starting next iteration. */
pmd_perf_stats_clear_lock(s);
}
s->iteration_cnt++;
/* Initialize the current interval stats. */
memset(&s->current, 0, sizeof(struct iter_stats));
if (OVS_LIKELY(s->last_tsc)) {
/* We assume here that last_tsc was updated immediately prior at
* the end of the previous iteration, or just before the first
* iteration. */
s->start_tsc = s->last_tsc;
} else {
/* In case last_tsc has never been set before. */
s->start_tsc = cycles_counter_update(s);
}
}
inline void
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
int tx_packets, bool full_metrics)
{
uint64_t now_tsc = cycles_counter_update(s);
struct iter_stats *cum_ms;
uint64_t cycles, cycles_per_pkt = 0;
cycles = now_tsc - s->start_tsc;
s->current.timestamp = s->iteration_cnt;
s->current.cycles = cycles;
s->current.pkts = rx_packets;
if (rx_packets + tx_packets > 0) {
pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
} else {
pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
}
/* Add iteration samples to histograms. */
histogram_add_sample(&s->cycles, cycles);
histogram_add_sample(&s->pkts, rx_packets);
if (!full_metrics) {
return;
}
s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles;
if (rx_packets > 0) {
cycles_per_pkt = cycles / rx_packets;
histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt);
}
if (s->current.batches > 0) {
histogram_add_sample(&s->pkts_per_batch,
rx_packets / s->current.batches);
}
histogram_add_sample(&s->upcalls, s->current.upcalls);
if (s->current.upcalls > 0) {
histogram_add_sample(&s->cycles_per_upcall,
s->current.upcall_cycles / s->current.upcalls);
}
histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
/* Add iteration samples to millisecond stats. */
cum_ms = history_current(&s->milliseconds);
cum_ms->iterations++;
cum_ms->cycles += cycles;
if (rx_packets > 0) {
cum_ms->busy_cycles += cycles;
}
cum_ms->pkts += s->current.pkts;
cum_ms->upcalls += s->current.upcalls;
cum_ms->upcall_cycles += s->current.upcall_cycles;
cum_ms->batches += s->current.batches;
cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
/* Store in iteration history. This advances the iteration idx and
* clears the next slot in the iteration history. */
history_store(&s->iterations, &s->current);
if (now_tsc > s->next_check_tsc) {
/* Check if ms is completed and store in milliseconds history. */
uint64_t now = time_msec();
if (now != cum_ms->timestamp) {
/* Add ms stats to totals. */
s->totals.iterations += cum_ms->iterations;
s->totals.cycles += cum_ms->cycles;
s->totals.busy_cycles += cum_ms->busy_cycles;
s->totals.pkts += cum_ms->pkts;
s->totals.upcalls += cum_ms->upcalls;
s->totals.upcall_cycles += cum_ms->upcall_cycles;
s->totals.batches += cum_ms->batches;
s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
cum_ms = history_next(&s->milliseconds);
cum_ms->timestamp = now;
}
/* Do the next check after 10K cycles (4 us at 2.5 GHz TSC clock). */
s->next_check_tsc = cycles_counter_update(s) + 10000;
}
}

View File

@@ -38,10 +38,18 @@
extern "C" {
#endif
/* This module encapsulates data structures and functions to maintain PMD
* performance metrics such as packet counters, execution cycles. It
* provides a clean API for dpif-netdev to initialize, update and read and
/* This module encapsulates data structures and functions to maintain basic PMD
* performance metrics such as packet counters, execution cycles as well as
* histograms and time series recording for more detailed PMD metrics.
*
* It provides a clean API for dpif-netdev to initialize, update and read and
* reset these metrics.
*
* The basic set of PMD counters is implemented as atomic_uint64_t variables
* to guarantee correct read also in 32-bit systems.
*
* The detailed PMD performance metrics are only supported on 64-bit systems
* with atomic 64-bit read and store semantics for plain uint64_t counters.
*/
/* Set of counter types maintained in pmd_perf_stats. */
@@ -66,6 +74,7 @@ enum pmd_stat_type {
PMD_STAT_SENT_BATCHES, /* Number of batches sent. */
PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */
PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */
PMD_CYCLES_UPCALL, /* Cycles spent processing upcalls. */
PMD_N_STATS
};
@@ -81,18 +90,91 @@ struct pmd_counters {
uint64_t zero[PMD_N_STATS]; /* Value at last _clear(). */
};
/* Container for all performance metrics of a PMD.
* Part of the struct dp_netdev_pmd_thread. */
/* Data structure to collect statistical distribution of an integer measurement
* type in form of a histogram. The wall[] array contains the inclusive
* upper boundaries of the bins, while the bin[] array contains the actual
* counters per bin. The histogram walls are typically set automatically
* using the functions provided below.*/
#define NUM_BINS 32 /* Number of histogram bins. */
struct histogram {
uint32_t wall[NUM_BINS];
uint64_t bin[NUM_BINS];
};
/* Data structure to record details PMD execution metrics per iteration for
* a history period of up to HISTORY_LEN iterations in circular buffer.
* Also used to record up to HISTORY_LEN millisecond averages/totals of these
* metrics.*/
struct iter_stats {
uint64_t timestamp; /* Iteration no. or millisecond. */
uint64_t cycles; /* Number of TSC cycles spent in it. or ms. */
uint64_t busy_cycles; /* Cycles spent in busy iterations or ms. */
uint32_t iterations; /* Iterations in ms. */
uint32_t pkts; /* Packets processed in iteration or ms. */
uint32_t upcalls; /* Number of upcalls in iteration or ms. */
uint32_t upcall_cycles; /* Cycles spent in upcalls in it. or ms. */
uint32_t batches; /* Number of rx batches in iteration or ms. */
uint32_t max_vhost_qfill; /* Maximum fill level in iteration or ms. */
};
#define HISTORY_LEN 1000 /* Length of recorded history
(iterations and ms). */
#define DEF_HIST_SHOW 20 /* Default number of history samples to
display. */
struct history {
size_t idx; /* Slot to which next call to history_store()
will write. */
struct iter_stats sample[HISTORY_LEN];
};
/* Container for all performance metrics of a PMD within the struct
* dp_netdev_pmd_thread. The metrics must be updated from within the PMD
* thread but can be read from any thread. The basic PMD counters in
* struct pmd_counters can be read without protection against concurrent
* clearing. The other metrics may only be safely read with the clear_mutex
* held to protect against concurrent clearing. */
struct pmd_perf_stats {
/* Start of the current PMD iteration in TSC cycles.*/
uint64_t start_it_tsc;
/* Prevents interference between PMD polling and stats clearing. */
struct ovs_mutex stats_mutex;
/* Set by CLI thread to order clearing of PMD stats. */
volatile bool clear;
/* Prevents stats retrieval while clearing is in progress. */
struct ovs_mutex clear_mutex;
/* Start of the current performance measurement period. */
uint64_t start_ms;
/* Counter for PMD iterations. */
uint64_t iteration_cnt;
/* Start of the current iteration. */
uint64_t start_tsc;
/* Latest TSC time stamp taken in PMD. */
uint64_t last_tsc;
/* Used to space certain checks in time. */
uint64_t next_check_tsc;
/* If non-NULL, outermost cycle timer currently running in PMD. */
struct cycle_timer *cur_timer;
/* Set of PMD counters with their zero offsets. */
struct pmd_counters counters;
/* Statistics of the current iteration. */
struct iter_stats current;
/* Totals for the current millisecond. */
struct iter_stats totals;
/* Histograms for the PMD metrics. */
struct histogram cycles;
struct histogram pkts;
struct histogram cycles_per_pkt;
struct histogram upcalls;
struct histogram cycles_per_upcall;
struct histogram pkts_per_batch;
struct histogram max_vhost_qfill;
/* Iteration history buffer. */
struct history iterations;
/* Millisecond history buffer. */
struct history milliseconds;
};
/* Support for accurate timing of PMD execution on TSC clock cycle level.
@@ -175,8 +257,14 @@ cycle_timer_stop(struct pmd_perf_stats *s,
return now - timer->start;
}
/* Functions to initialize and reset the PMD performance metrics. */
void pmd_perf_stats_init(struct pmd_perf_stats *s);
void pmd_perf_stats_clear(struct pmd_perf_stats *s);
void pmd_perf_stats_clear_lock(struct pmd_perf_stats *s);
/* Functions to read and update PMD counters. */
void pmd_perf_read_counters(struct pmd_perf_stats *s,
uint64_t stats[PMD_N_STATS]);
@@ -199,32 +287,95 @@ pmd_perf_update_counter(struct pmd_perf_stats *s,
atomic_store_relaxed(&s->counters.n[counter], tmp);
}
static inline void
pmd_perf_start_iteration(struct pmd_perf_stats *s)
{
if (OVS_LIKELY(s->last_tsc)) {
/* We assume here that last_tsc was updated immediately prior at
* the end of the previous iteration, or just before the first
* iteration. */
s->start_it_tsc = s->last_tsc;
} else {
/* In case last_tsc has never been set before. */
s->start_it_tsc = cycles_counter_update(s);
}
}
/* Functions to manipulate a sample history. */
static inline void
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets)
histogram_add_sample(struct histogram *hist, uint32_t val)
{
uint64_t cycles = cycles_counter_update(s) - s->start_it_tsc;
if (rx_packets > 0) {
pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
} else {
pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
/* TODO: Can do better with binary search? */
for (int i = 0; i < NUM_BINS-1; i++) {
if (val <= hist->wall[i]) {
hist->bin[i]++;
return;
}
}
hist->bin[NUM_BINS-1]++;
}
uint64_t histogram_samples(const struct histogram *hist);
/* This function is used to advance the given history index by positive
* offset in the circular history buffer. */
static inline uint32_t
history_add(uint32_t idx, uint32_t offset)
{
return (idx + offset) % HISTORY_LEN;
}
/* This function computes the difference between two indices into the
* circular history buffer. The result is always positive in the range
* 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1
* starting from idx2. It can also be used to retreat the history index
* idx1 by idx2 steps. */
static inline uint32_t
history_sub(uint32_t idx1, uint32_t idx2)
{
return (idx1 + HISTORY_LEN - idx2) % HISTORY_LEN;
}
static inline struct iter_stats *
history_current(struct history *h)
{
return &h->sample[h->idx];
}
static inline struct iter_stats *
history_next(struct history *h)
{
size_t next_idx = history_add(h->idx, 1);
struct iter_stats *next = &h->sample[next_idx];
memset(next, 0, sizeof(*next));
h->idx = next_idx;
return next;
}
static inline struct iter_stats *
history_store(struct history *h, struct iter_stats *is)
{
if (is) {
h->sample[h->idx] = *is;
}
/* Advance the history pointer */
return history_next(h);
}
/* Functions recording PMD metrics per iteration. */
void
pmd_perf_start_iteration(struct pmd_perf_stats *s);
void
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
int tx_packets, bool full_metrics);
/* Formatting the output of commands. */
struct pmd_perf_params {
int command_type;
bool histograms;
size_t iter_hist_len;
size_t ms_hist_len;
};
void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
double duration);
void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
void pmd_perf_format_iteration_history(struct ds *str,
struct pmd_perf_stats *s,
int n_iter);
void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
int n_ms);
#ifdef __cplusplus
}
#endif

157
lib/dpif-netdev-unixctl.man Normal file
View File

@@ -0,0 +1,157 @@
.SS "DPIF-NETDEV COMMANDS"
These commands are used to expose internal information (mostly statistics)
about the "dpif-netdev" userspace datapath. If there is only one datapath
(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR
argument can be omitted. By default the commands present data for all pmd
threads in the datapath. By specifying the "-pmd Core" option one can filter
the output for a single pmd in the datapath.
.
.IP "\fBdpif-netdev/pmd-stats-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
Shows performance statistics for one or all pmd threads of the datapath
\fIdp\fR. The special thread "main" sums up the statistics of every non pmd
thread.
The sum of "emc hits", "masked hits" and "miss" is the number of
packet lookups performed by the datapath. Beware that a recirculated packet
experiences one additional lookup per recirculation, so there may be
more lookups than forwarded packets in the datapath.
Cycles are counted using the TSC or similar facilities (when available on
the platform). The duration of one cycle depends on the processing platform.
"idle cycles" refers to cycles spent in PMD iterations not forwarding any
any packets. "processing cycles" refers to cycles spent in PMD iterations
forwarding at least one packet, including the cost for polling, processing and
transmitting said packets.
To reset these counters use \fBdpif-netdev/pmd-stats-clear\fR.
.
.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]"
Resets to zero the per pmd thread performance numbers shown by the
\fBdpif-netdev/pmd-stats-show\fR and \fBdpif-netdev/pmd-perf-show\fR commands.
It will NOT reset datapath or bridge statistics, only the values shown by
the above commands.
.
.IP "\fBdpif-netdev/pmd-perf-show\fR [\fB-nh\fR] [\fB-it\fR \fIiter_len\fR] \
[\fB-ms\fR \fIms_len\fR] [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
Shows detailed performance metrics for one or all pmds threads of the
user space datapath.
The collection of detailed statistics can be controlled by a new
configuration parameter "other_config:pmd-perf-metrics". By default it
is disabled. The run-time overhead, when enabled, is in the order of 1%.
.RS
.IP
.PD .4v
.IP \(em
used cycles
.IP \(em
forwared packets
.IP \(em
number of rx batches
.IP \(em
packets/rx batch
.IP \(em
max. vhostuser queue fill level
.IP \(em
number of upcalls
.IP \(em
cycles spent in upcalls
.PD
.RE
.IP
This raw recorded data is used threefold:
.RS
.IP
.PD .4v
.IP 1.
In histograms for each of the following metrics:
.RS
.IP \(em
cycles/iteration (logarithmic)
.IP \(em
packets/iteration (logarithmic)
.IP \(em
cycles/packet
.IP \(em
packets/batch
.IP \(em
max. vhostuser qlen (logarithmic)
.IP \(em
upcalls
.IP \(em
cycles/upcall (logarithmic)
The histograms bins are divided linear or logarithmic.
.RE
.IP 2.
A cyclic history of the above metrics for 1024 iterations
.IP 3.
A cyclic history of the cummulative/average values per millisecond wall
clock for the last 1024 milliseconds:
.RS
.IP \(em
number of iterations
.IP \(em
avg. cycles/iteration
.IP \(em
packets (Kpps)
.IP \(em
avg. packets/batch
.IP \(em
avg. max vhost qlen
.IP \(em
upcalls
.IP \(em
avg. cycles/upcall
.RE
.PD
.RE
.IP
.
The command options are:
.RS
.IP "\fB-nh\fR"
Suppress the histograms
.IP "\fB-it\fR \fIiter_len\fR"
Display the last iter_len iteration stats
.IP "\fB-ms\fR \fIms_len\fR"
Display the last ms_len millisecond stats
.RE
.IP
The output always contains the following global PMD statistics:
.RS
.IP
Time: 15:24:55.270 .br
Measurement duration: 1.008 s
pmd thread numa_id 0 core_id 1:
Cycles: 2419034712 (2.40 GHz)
Iterations: 572817 (1.76 us/it)
- idle: 486808 (15.9 % cycles)
- busy: 86009 (84.1 % cycles)
Rx packets: 2399607 (2381 Kpps, 848 cycles/pkt)
Datapath passes: 3599415 (1.50 passes/pkt)
- EMC hits: 336472 ( 9.3 %)
- Megaflow hits: 3262943 (90.7 %, 1.00 subtbl lookups/hit)
- Upcalls: 0 ( 0.0 %, 0.0 us/upcall)
- Lost upcalls: 0 ( 0.0 %)
Tx packets: 2399607 (2381 Kpps)
Tx batches: 171400 (14.00 pkts/batch)
.RE
.IP
Here "Rx packets" actually reflects the number of packets forwarded by the
datapath. "Datapath passes" matches the number of packet lookups as
reported by the \fBdpif-netdev/pmd-stats-show\fR command.
To reset the counters and start a new measurement use
\fBdpif-netdev/pmd-stats-clear\fR.
.
.IP "\fBdpif-netdev/pmd-rxq-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
For one or all pmd threads of the datapath \fIdp\fR show the list of queue-ids
with port names, which this thread polls.
.
.IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.

View File

@@ -49,6 +49,7 @@
#include "id-pool.h"
#include "latch.h"
#include "netdev.h"
#include "netdev-provider.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "odp-execute.h"
@@ -281,6 +282,8 @@ struct dp_netdev {
/* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
/* Enable collection of PMD performance metrics. */
atomic_bool pmd_perf_metrics;
/* Protects access to ofproto-dpif-upcall interface during revalidator
* thread synchronization. */
@@ -356,6 +359,7 @@ struct dp_netdev_rxq {
particular core. */
unsigned intrvl_idx; /* Write index for 'cycles_intrvl'. */
struct dp_netdev_pmd_thread *pmd; /* pmd thread that polls this queue. */
bool is_vhost; /* Is rxq of a vhost port. */
/* Counters of cycles spent successfully polling and processing pkts. */
atomic_ullong cycles[RXQ_N_CYCLES];
@@ -717,6 +721,8 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
static void emc_clear_entry(struct emc_entry *ce);
static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
static inline bool
pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
static void
emc_cache_init(struct emc_cache *flow_cache)
@@ -800,7 +806,8 @@ get_dp_netdev(const struct dpif *dpif)
enum pmd_info_type {
PMD_INFO_SHOW_STATS, /* Show how cpu cycles are spent. */
PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
PMD_INFO_SHOW_RXQ /* Show poll-lists of pmd threads. */
PMD_INFO_SHOW_RXQ, /* Show poll lists of pmd threads. */
PMD_INFO_PERF_SHOW, /* Show pmd performance details. */
};
static void
@@ -891,6 +898,47 @@ pmd_info_show_stats(struct ds *reply,
stats[PMD_CYCLES_ITER_BUSY], total_packets);
}
static void
pmd_info_show_perf(struct ds *reply,
struct dp_netdev_pmd_thread *pmd,
struct pmd_perf_params *par)
{
if (pmd->core_id != NON_PMD_CORE_ID) {
char *time_str =
xastrftime_msec("%H:%M:%S.###", time_wall_msec(), true);
long long now = time_msec();
double duration = (now - pmd->perf_stats.start_ms) / 1000.0;
ds_put_cstr(reply, "\n");
ds_put_format(reply, "Time: %s\n", time_str);
ds_put_format(reply, "Measurement duration: %.3f s\n", duration);
ds_put_cstr(reply, "\n");
format_pmd_thread(reply, pmd);
ds_put_cstr(reply, "\n");
pmd_perf_format_overall_stats(reply, &pmd->perf_stats, duration);
if (pmd_perf_metrics_enabled(pmd)) {
/* Prevent parallel clearing of perf metrics. */
ovs_mutex_lock(&pmd->perf_stats.clear_mutex);
if (par->histograms) {
ds_put_cstr(reply, "\n");
pmd_perf_format_histograms(reply, &pmd->perf_stats);
}
if (par->iter_hist_len > 0) {
ds_put_cstr(reply, "\n");
pmd_perf_format_iteration_history(reply, &pmd->perf_stats,
par->iter_hist_len);
}
if (par->ms_hist_len > 0) {
ds_put_cstr(reply, "\n");
pmd_perf_format_ms_history(reply, &pmd->perf_stats,
par->ms_hist_len);
}
ovs_mutex_unlock(&pmd->perf_stats.clear_mutex);
}
free(time_str);
}
}
static int
compare_poll_list(const void *a_, const void *b_)
{
@@ -1068,7 +1116,7 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
ovs_mutex_lock(&dp_netdev_mutex);
while (argc > 1) {
if (!strcmp(argv[1], "-pmd") && argc >= 3) {
if (!strcmp(argv[1], "-pmd") && argc > 2) {
if (str_to_uint(argv[2], 10, &core_id)) {
filter_on_pmd = true;
}
@@ -1108,6 +1156,8 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
pmd_perf_stats_clear(&pmd->perf_stats);
} else if (type == PMD_INFO_SHOW_STATS) {
pmd_info_show_stats(&reply, pmd);
} else if (type == PMD_INFO_PERF_SHOW) {
pmd_info_show_perf(&reply, pmd, (struct pmd_perf_params *)aux);
}
}
free(pmd_list);
@@ -1117,6 +1167,48 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
unixctl_command_reply(conn, ds_cstr(&reply));
ds_destroy(&reply);
}
static void
pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
const char *argv[],
void *aux OVS_UNUSED)
{
struct pmd_perf_params par;
long int it_hist = 0, ms_hist = 0;
par.histograms = true;
while (argc > 1) {
if (!strcmp(argv[1], "-nh")) {
par.histograms = false;
argc -= 1;
argv += 1;
} else if (!strcmp(argv[1], "-it") && argc > 2) {
it_hist = strtol(argv[2], NULL, 10);
if (it_hist < 0) {
it_hist = 0;
} else if (it_hist > HISTORY_LEN) {
it_hist = HISTORY_LEN;
}
argc -= 2;
argv += 2;
} else if (!strcmp(argv[1], "-ms") && argc > 2) {
ms_hist = strtol(argv[2], NULL, 10);
if (ms_hist < 0) {
ms_hist = 0;
} else if (ms_hist > HISTORY_LEN) {
ms_hist = HISTORY_LEN;
}
argc -= 2;
argv += 2;
} else {
break;
}
}
par.iter_hist_len = it_hist;
par.ms_hist_len = ms_hist;
par.command_type = PMD_INFO_PERF_SHOW;
dpif_netdev_pmd_info(conn, argc, argv, &par);
}
static int
dpif_netdev_init(void)
@@ -1134,6 +1226,12 @@ dpif_netdev_init(void)
unixctl_command_register("dpif-netdev/pmd-rxq-show", "[-pmd core] [dp]",
0, 3, dpif_netdev_pmd_info,
(void *)&poll_aux);
unixctl_command_register("dpif-netdev/pmd-perf-show",
"[-nh] [-it iter-history-len]"
" [-ms ms-history-len]"
" [-pmd core] [dp]",
0, 8, pmd_perf_show_cmd,
NULL);
unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
0, 1, dpif_netdev_pmd_rebalance,
NULL);
@@ -3021,6 +3119,18 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
}
}
bool perf_enabled = smap_get_bool(other_config, "pmd-perf-metrics", false);
bool cur_perf_enabled;
atomic_read_relaxed(&dp->pmd_perf_metrics, &cur_perf_enabled);
if (perf_enabled != cur_perf_enabled) {
atomic_store_relaxed(&dp->pmd_perf_metrics, perf_enabled);
if (perf_enabled) {
VLOG_INFO("PMD performance metrics collection enabled");
} else {
VLOG_INFO("PMD performance metrics collection disabled");
}
}
return 0;
}
@@ -3190,6 +3300,25 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx)
return processing_cycles;
}
#if ATOMIC_ALWAYS_LOCK_FREE_8B
static inline bool
pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd)
{
bool pmd_perf_enabled;
atomic_read_relaxed(&pmd->dp->pmd_perf_metrics, &pmd_perf_enabled);
return pmd_perf_enabled;
}
#else
/* If stores and reads of 64-bit integers are not atomic, the full PMD
* performance metrics are not available as locked access to 64 bit
* integers would be prohibitively expensive. */
static inline bool
pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
{
return false;
}
#endif
static int
dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
struct tx_port *p)
@@ -3265,10 +3394,12 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_rxq *rxq,
odp_port_t port_no)
{
struct pmd_perf_stats *s = &pmd->perf_stats;
struct dp_packet_batch batch;
struct cycle_timer timer;
int error;
int batch_cnt = 0, output_cnt = 0;
int batch_cnt = 0;
int rem_qlen = 0, *qlen_p = NULL;
uint64_t cycles;
/* Measure duration for polling and processing rx burst. */
@@ -3277,20 +3408,37 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
pmd->ctx.last_rxq = rxq;
dp_packet_batch_init(&batch);
error = netdev_rxq_recv(rxq->rx, &batch, NULL);
/* Fetch the rx queue length only for vhostuser ports. */
if (pmd_perf_metrics_enabled(pmd) && rxq->is_vhost) {
qlen_p = &rem_qlen;
}
error = netdev_rxq_recv(rxq->rx, &batch, qlen_p);
if (!error) {
/* At least one packet received. */
*recirc_depth_get() = 0;
pmd_thread_ctx_time_update(pmd);
batch_cnt = batch.count;
if (pmd_perf_metrics_enabled(pmd)) {
/* Update batch histogram. */
s->current.batches++;
histogram_add_sample(&s->pkts_per_batch, batch_cnt);
/* Update the maximum vhost rx queue fill level. */
if (rxq->is_vhost && rem_qlen >= 0) {
uint32_t qfill = batch_cnt + rem_qlen;
if (qfill > s->current.max_vhost_qfill) {
s->current.max_vhost_qfill = qfill;
}
}
}
/* Process packet batch. */
dp_netdev_input(pmd, &batch, port_no);
/* Assign processing cycles to rx queue. */
cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
output_cnt = dp_netdev_pmd_flush_output_packets(pmd, false);
dp_netdev_pmd_flush_output_packets(pmd, false);
} else {
/* Discard cycles. */
cycle_timer_stop(&pmd->perf_stats, &timer);
@@ -3304,7 +3452,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
pmd->ctx.last_rxq = NULL;
return batch_cnt + output_cnt;
return batch_cnt;
}
static struct tx_port *
@@ -3360,6 +3508,7 @@ port_reconfigure(struct dp_netdev_port *port)
}
port->rxqs[i].port = port;
port->rxqs[i].is_vhost = !strncmp(port->type, "dpdkvhost", 9);
err = netdev_rxq_open(netdev, &port->rxqs[i].rx, i);
if (err) {
@@ -4138,23 +4287,26 @@ reload:
pmd->intrvl_tsc_prev = 0;
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
cycles_counter_update(s);
/* Protect pmd stats from external clearing while polling. */
ovs_mutex_lock(&pmd->perf_stats.stats_mutex);
for (;;) {
uint64_t iter_packets = 0;
uint64_t rx_packets = 0, tx_packets = 0;
pmd_perf_start_iteration(s);
for (i = 0; i < poll_cnt; i++) {
process_packets =
dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
poll_list[i].port_no);
iter_packets += process_packets;
rx_packets += process_packets;
}
if (!iter_packets) {
if (!rx_packets) {
/* We didn't receive anything in the process loop.
* Check if we need to send something.
* There was no time updates on current iteration. */
pmd_thread_ctx_time_update(pmd);
iter_packets += dp_netdev_pmd_flush_output_packets(pmd, false);
tx_packets = dp_netdev_pmd_flush_output_packets(pmd, false);
}
if (lc++ > 1024) {
@@ -4173,8 +4325,10 @@ reload:
break;
}
}
pmd_perf_end_iteration(s, iter_packets);
pmd_perf_end_iteration(s, rx_packets, tx_packets,
pmd_perf_metrics_enabled(pmd));
}
ovs_mutex_unlock(&pmd->perf_stats.stats_mutex);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
exiting = latch_is_set(&pmd->exit_latch);
@@ -5069,6 +5223,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
struct match match;
ovs_u128 ufid;
int error;
uint64_t cycles = cycles_counter_update(&pmd->perf_stats);
match.tun_md.valid = false;
miniflow_expand(&key->mf, &match.flow);
@@ -5122,6 +5277,14 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
ovs_mutex_unlock(&pmd->flow_mutex);
emc_probabilistic_insert(pmd, key, netdev_flow);
}
if (pmd_perf_metrics_enabled(pmd)) {
/* Update upcall stats. */
cycles = cycles_counter_update(&pmd->perf_stats) - cycles;
struct pmd_perf_stats *s = &pmd->perf_stats;
s->current.upcalls++;
s->current.upcall_cycles += cycles;
histogram_add_sample(&s->cycles_per_upcall, cycles);
}
return error;
}

View File

@@ -252,6 +252,7 @@ vswitchd/ovs-vswitchd.8: \
lib/coverage-unixctl.man \
lib/daemon.man \
lib/dpctl.man \
lib/dpif-netdev-unixctl.man \
lib/memory-unixctl.man \
lib/netdev-dpdk-unixctl.man \
lib/service.man \
@@ -269,6 +270,7 @@ lib/common.man:
lib/coverage-unixctl.man:
lib/daemon.man:
lib/dpctl.man:
lib/dpif-netdev-unixctl.man:
lib/memory-unixctl.man:
lib/netdev-dpdk-unixctl.man:
lib/service.man:

View File

@@ -264,32 +264,7 @@ type).
..
.so lib/dpctl.man
.
.SS "DPIF-NETDEV COMMANDS"
These commands are used to expose internal information (mostly statistics)
about the ``dpif-netdev'' userspace datapath. If there is only one datapath
(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR
argument can be omitted.
.IP "\fBdpif-netdev/pmd-stats-show\fR [\fIdp\fR]"
Shows performance statistics for each pmd thread of the datapath \fIdp\fR.
The special thread ``main'' sums up the statistics of every non pmd thread.
The sum of ``emc hits'', ``masked hits'' and ``miss'' is the number of
packets received by the datapath. Cycles are counted using the TSC or similar
facilities (when available on the platform). To reset these counters use
\fBdpif-netdev/pmd-stats-clear\fR. The duration of one cycle depends on the
measuring infrastructure. ``idle cycles'' refers to cycles spent polling
devices but not receiving any packets. ``processing cycles'' refers to cycles
spent polling devices and successfully receiving packets, plus the cycles
spent processing said packets.
.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]"
Resets to zero the per pmd thread performance numbers shown by the
\fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or
bridge statistics, only the values shown by the above command.
.IP "\fBdpif-netdev/pmd-rxq-show\fR [\fIdp\fR]"
For each pmd thread of the datapath \fIdp\fR shows list of queue-ids with
port names, which this thread polls.
.IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
.
.so lib/dpif-netdev-unixctl.man
.so lib/netdev-dpdk-unixctl.man
.so ofproto/ofproto-dpif-unixctl.man
.so ofproto/ofproto-unixctl.man

View File

@@ -375,6 +375,18 @@
</p>
</column>
<column name="other_config" key="pmd-perf-metrics"
type='{"type": "boolean"}'>
<p>
Enables recording of detailed PMD performance metrics for analysis
and trouble-shooting. This can have a performance impact in the
order of 1%.
</p>
<p>
Defaults to false but can be changed at any time.
</p>
</column>
<column name="other_config" key="n-handler-threads"
type='{"type": "integer", "minInteger": 1}'>
<p>