mirror of
https://github.com/openvswitch/ovs
synced 2025-08-29 13:27:59 +00:00
dpif-netdev: Time based output batching.
This allows to collect packets from more than one RX burst and send them together with a configurable intervals. 'other_config:tx-flush-interval' can be used to configure time that a packet can wait in output batch for sending. 'tx-flush-interval' has microsecond resolution. Tested-by: Jan Scheurich <jan.scheurich@ericsson.com> Acked-by: Jan Scheurich <jan.scheurich@ericsson.com> Signed-off-by: Ilya Maximets <i.maximets@samsung.com> Signed-off-by: Ian Stokes <ian.stokes@intel.com>
This commit is contained in:
parent
58ed6df048
commit
c71ea3c4a7
@ -83,6 +83,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
|
||||
#define MAX_RECIRC_DEPTH 6
|
||||
DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
|
||||
|
||||
/* Use instant packet send by default. */
|
||||
#define DEFAULT_TX_FLUSH_INTERVAL 0
|
||||
|
||||
/* Configuration parameters. */
|
||||
enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
|
||||
enum { MAX_METERS = 65536 }; /* Maximum number of meters. */
|
||||
@ -269,6 +272,9 @@ struct dp_netdev {
|
||||
struct hmap ports;
|
||||
struct seq *port_seq; /* Incremented whenever a port changes. */
|
||||
|
||||
/* The time that a packet can wait in output batch for sending. */
|
||||
atomic_uint32_t tx_flush_interval;
|
||||
|
||||
/* Meters. */
|
||||
struct ovs_mutex meter_locks[N_METER_LOCKS];
|
||||
struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
|
||||
@ -496,6 +502,7 @@ struct tx_port {
|
||||
int qid;
|
||||
long long last_used;
|
||||
struct hmap_node node;
|
||||
long long flush_time;
|
||||
struct dp_packet_batch output_pkts;
|
||||
struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
|
||||
};
|
||||
@ -577,6 +584,9 @@ struct dp_netdev_pmd_thread {
|
||||
* than 'cmap_count(dp->poll_threads)'. */
|
||||
uint32_t static_tx_qid;
|
||||
|
||||
/* Number of filled output batches. */
|
||||
int n_output_batches;
|
||||
|
||||
struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */
|
||||
/* List of rx queues to poll. */
|
||||
struct hmap poll_list OVS_GUARDED;
|
||||
@ -666,8 +676,9 @@ static void dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
|
||||
static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
|
||||
struct rxq_poll *poll)
|
||||
OVS_REQUIRES(pmd->port_mutex);
|
||||
static void
|
||||
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd);
|
||||
static int
|
||||
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
|
||||
bool force);
|
||||
|
||||
static void reconfigure_datapath(struct dp_netdev *dp)
|
||||
OVS_REQUIRES(dp->port_mutex);
|
||||
@ -1241,6 +1252,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
|
||||
conntrack_init(&dp->conntrack);
|
||||
|
||||
atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
|
||||
atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
|
||||
|
||||
cmap_init(&dp->poll_threads);
|
||||
|
||||
@ -2907,7 +2919,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
|
||||
dp_packet_batch_init_packet(&pp, execute->packet);
|
||||
dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
|
||||
execute->actions, execute->actions_len);
|
||||
dp_netdev_pmd_flush_output_packets(pmd);
|
||||
dp_netdev_pmd_flush_output_packets(pmd, true);
|
||||
|
||||
if (pmd->core_id == NON_PMD_CORE_ID) {
|
||||
ovs_mutex_unlock(&dp->non_pmd_mutex);
|
||||
@ -2956,6 +2968,16 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
|
||||
smap_get_ullong(other_config, "emc-insert-inv-prob",
|
||||
DEFAULT_EM_FLOW_INSERT_INV_PROB);
|
||||
uint32_t insert_min, cur_min;
|
||||
uint32_t tx_flush_interval, cur_tx_flush_interval;
|
||||
|
||||
tx_flush_interval = smap_get_int(other_config, "tx-flush-interval",
|
||||
DEFAULT_TX_FLUSH_INTERVAL);
|
||||
atomic_read_relaxed(&dp->tx_flush_interval, &cur_tx_flush_interval);
|
||||
if (tx_flush_interval != cur_tx_flush_interval) {
|
||||
atomic_store_relaxed(&dp->tx_flush_interval, tx_flush_interval);
|
||||
VLOG_INFO("Flushing interval for tx queues set to %"PRIu32" us",
|
||||
tx_flush_interval);
|
||||
}
|
||||
|
||||
if (!nullable_string_is_equal(dp->pmd_cmask, cmask)) {
|
||||
free(dp->pmd_cmask);
|
||||
@ -3150,7 +3172,7 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx)
|
||||
return processing_cycles;
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
|
||||
struct tx_port *p)
|
||||
{
|
||||
@ -3160,6 +3182,7 @@ dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
|
||||
bool dynamic_txqs;
|
||||
struct cycle_timer timer;
|
||||
uint64_t cycles;
|
||||
uint32_t tx_flush_interval;
|
||||
|
||||
cycle_timer_start(&pmd->perf_stats, &timer);
|
||||
|
||||
@ -3176,6 +3199,13 @@ dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
|
||||
netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
|
||||
dp_packet_batch_init(&p->output_pkts);
|
||||
|
||||
/* Update time of the next flush. */
|
||||
atomic_read_relaxed(&pmd->dp->tx_flush_interval, &tx_flush_interval);
|
||||
p->flush_time = pmd->ctx.now + tx_flush_interval;
|
||||
|
||||
ovs_assert(pmd->n_output_batches > 0);
|
||||
pmd->n_output_batches--;
|
||||
|
||||
pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS, output_cnt);
|
||||
pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1);
|
||||
|
||||
@ -3188,18 +3218,28 @@ dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
|
||||
RXQ_CYCLES_PROC_CURR, cycles);
|
||||
}
|
||||
}
|
||||
|
||||
return output_cnt;
|
||||
}
|
||||
|
||||
static void
|
||||
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd)
|
||||
static int
|
||||
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
|
||||
bool force)
|
||||
{
|
||||
struct tx_port *p;
|
||||
int output_cnt = 0;
|
||||
|
||||
if (!pmd->n_output_batches) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
|
||||
if (!dp_packet_batch_is_empty(&p->output_pkts)) {
|
||||
dp_netdev_pmd_flush_output_on_port(pmd, p);
|
||||
if (!dp_packet_batch_is_empty(&p->output_pkts)
|
||||
&& (force || pmd->ctx.now >= p->flush_time)) {
|
||||
output_cnt += dp_netdev_pmd_flush_output_on_port(pmd, p);
|
||||
}
|
||||
}
|
||||
return output_cnt;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -3210,7 +3250,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
|
||||
struct dp_packet_batch batch;
|
||||
struct cycle_timer timer;
|
||||
int error;
|
||||
int batch_cnt = 0;
|
||||
int batch_cnt = 0, output_cnt = 0;
|
||||
uint64_t cycles;
|
||||
|
||||
/* Measure duration for polling and processing rx burst. */
|
||||
@ -3232,7 +3272,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
|
||||
cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
|
||||
dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
|
||||
|
||||
dp_netdev_pmd_flush_output_packets(pmd);
|
||||
output_cnt = dp_netdev_pmd_flush_output_packets(pmd, false);
|
||||
} else {
|
||||
/* Discard cycles. */
|
||||
cycle_timer_stop(&pmd->perf_stats, &timer);
|
||||
@ -3246,7 +3286,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
|
||||
|
||||
pmd->ctx.last_rxq = NULL;
|
||||
|
||||
return batch_cnt;
|
||||
return batch_cnt + output_cnt;
|
||||
}
|
||||
|
||||
static struct tx_port *
|
||||
@ -3869,6 +3909,7 @@ dpif_netdev_run(struct dpif *dpif)
|
||||
struct dp_netdev *dp = get_dp_netdev(dpif);
|
||||
struct dp_netdev_pmd_thread *non_pmd;
|
||||
uint64_t new_tnl_seq;
|
||||
bool need_to_flush = true;
|
||||
|
||||
ovs_mutex_lock(&dp->port_mutex);
|
||||
non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
|
||||
@ -3879,13 +3920,22 @@ dpif_netdev_run(struct dpif *dpif)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < port->n_rxq; i++) {
|
||||
dp_netdev_process_rxq_port(non_pmd,
|
||||
&port->rxqs[i],
|
||||
port->port_no);
|
||||
if (dp_netdev_process_rxq_port(non_pmd,
|
||||
&port->rxqs[i],
|
||||
port->port_no)) {
|
||||
need_to_flush = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pmd_thread_ctx_time_update(non_pmd);
|
||||
if (need_to_flush) {
|
||||
/* We didn't receive anything in the process loop.
|
||||
* Check if we need to send something.
|
||||
* There was no time updates on current iteration. */
|
||||
pmd_thread_ctx_time_update(non_pmd);
|
||||
dp_netdev_pmd_flush_output_packets(non_pmd, false);
|
||||
}
|
||||
|
||||
dpif_netdev_xps_revalidate_pmd(non_pmd, false);
|
||||
ovs_mutex_unlock(&dp->non_pmd_mutex);
|
||||
|
||||
@ -3936,6 +3986,8 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
|
||||
{
|
||||
struct tx_port *tx_port_cached;
|
||||
|
||||
/* Flush all the queued packets. */
|
||||
dp_netdev_pmd_flush_output_packets(pmd, true);
|
||||
/* Free all used tx queue ids. */
|
||||
dpif_netdev_xps_revalidate_pmd(pmd, true);
|
||||
|
||||
@ -4066,6 +4118,7 @@ reload:
|
||||
cycles_counter_update(s);
|
||||
for (;;) {
|
||||
uint64_t iter_packets = 0;
|
||||
|
||||
pmd_perf_start_iteration(s);
|
||||
for (i = 0; i < poll_cnt; i++) {
|
||||
process_packets =
|
||||
@ -4074,15 +4127,20 @@ reload:
|
||||
iter_packets += process_packets;
|
||||
}
|
||||
|
||||
if (!iter_packets) {
|
||||
/* We didn't receive anything in the process loop.
|
||||
* Check if we need to send something.
|
||||
* There was no time updates on current iteration. */
|
||||
pmd_thread_ctx_time_update(pmd);
|
||||
iter_packets += dp_netdev_pmd_flush_output_packets(pmd, false);
|
||||
}
|
||||
|
||||
if (lc++ > 1024) {
|
||||
bool reload;
|
||||
|
||||
lc = 0;
|
||||
|
||||
coverage_try_clear();
|
||||
/* It's possible that the time was not updated on current
|
||||
* iteration, if there were no received packets. */
|
||||
pmd_thread_ctx_time_update(pmd);
|
||||
dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
|
||||
if (!ovsrcu_try_quiesce()) {
|
||||
emc_cache_slow_sweep(&pmd->flow_cache);
|
||||
@ -4521,6 +4579,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
|
||||
pmd->core_id = core_id;
|
||||
pmd->numa_id = numa_id;
|
||||
pmd->need_reload = false;
|
||||
pmd->n_output_batches = 0;
|
||||
|
||||
ovs_refcount_init(&pmd->ref_cnt);
|
||||
latch_init(&pmd->exit_latch);
|
||||
@ -4710,6 +4769,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
|
||||
|
||||
tx->port = port;
|
||||
tx->qid = -1;
|
||||
tx->flush_time = 0LL;
|
||||
dp_packet_batch_init(&tx->output_pkts);
|
||||
|
||||
hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no));
|
||||
@ -5404,12 +5464,16 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
|
||||
dp_netdev_pmd_flush_output_on_port(pmd, p);
|
||||
}
|
||||
#endif
|
||||
if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts)
|
||||
+ dp_packet_batch_size(packets_) > NETDEV_MAX_BURST)) {
|
||||
/* Some packets was generated while input batch processing.
|
||||
* Flush here to avoid overflow. */
|
||||
if (dp_packet_batch_size(&p->output_pkts)
|
||||
+ dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
|
||||
/* Flush here to avoid overflow. */
|
||||
dp_netdev_pmd_flush_output_on_port(pmd, p);
|
||||
}
|
||||
|
||||
if (dp_packet_batch_is_empty(&p->output_pkts)) {
|
||||
pmd->n_output_batches++;
|
||||
}
|
||||
|
||||
DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
|
||||
p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
|
||||
pmd->ctx.last_rxq;
|
||||
|
@ -359,6 +359,22 @@
|
||||
</p>
|
||||
</column>
|
||||
|
||||
<column name="other_config" key="tx-flush-interval"
|
||||
type='{"type": "integer",
|
||||
"minInteger": 0, "maxInteger": 1000000}'>
|
||||
<p>
|
||||
Specifies the time in microseconds that a packet can wait in output
|
||||
batch for sending i.e. amount of time that packet can spend in an
|
||||
intermediate output queue before sending to netdev.
|
||||
This option can be used to configure balance between throughput
|
||||
and latency. Lower values decreases latency while higher values
|
||||
may be useful to achieve higher performance.
|
||||
</p>
|
||||
<p>
|
||||
Defaults to 0 i.e. instant packet sending (latency optimized).
|
||||
</p>
|
||||
</column>
|
||||
|
||||
<column name="other_config" key="n-handler-threads"
|
||||
type='{"type": "integer", "minInteger": 1}'>
|
||||
<p>
|
||||
|
Loading…
x
Reference in New Issue
Block a user