2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 09:58:01 +00:00
ovs/lib/dpif-netdev-private-thread.h
Kevin Traynor 4cbbf56e6c dpif-netdev: Add per PMD sleep config.
Extend 'pmd-sleep-max' so that individual PMD thread cores may have a
specified max sleep request value.

Existing behaviour is maintained.

Any PMD thread core without a value will use the global value if set
or default no sleep.

To set PMD thread cores 8 and 9 to never request a load based sleep
and all other PMD thread cores to be able to request a max sleep of
50 usecs:

 $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0

To set PMD thread cores 10 and 11 to request a max sleep of 100 usecs
and all other PMD thread cores to never request a sleep:

 $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=10:100,11:100

'pmd-sleep-show' is updated to show the max sleep value for each PMD
thread.

Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-12-16 01:07:59 +01:00

239 lines
9.6 KiB
C

/*
* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
* Copyright (c) 2019, 2020, 2021 Intel Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DPIF_NETDEV_PRIVATE_THREAD_H
#define DPIF_NETDEV_PRIVATE_THREAD_H 1
#include "dpif.h"
#include "dpif-netdev-perf.h"
#include "dpif-netdev-private-dfc.h"
#include "dpif-netdev-private-dpif.h"
#include <stdbool.h>
#include <stdint.h>
#include "ccmap.h"
#include "cmap.h"
#include "dpif-netdev-private-dfc.h"
#include "dpif-netdev-private-dpif.h"
#include "dpif-netdev-perf.h"
#include "dpif-netdev-private-extract.h"
#include "openvswitch/thread.h"
#ifdef __cplusplus
extern "C" {
#endif
/* PMD Thread Structures */
/* A set of properties for the current processing loop that is not directly
* associated with the pmd thread itself, but with the packets being
* processed or the short-term system configuration (for example, time).
* Contained by struct dp_netdev_pmd_thread's 'ctx' member. */
struct dp_netdev_pmd_thread_ctx {
/* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
long long now;
/* RX queue from which last packet was received. */
struct dp_netdev_rxq *last_rxq;
/* EMC insertion probability context for the current processing cycle. */
uint32_t emc_insert_min;
/* Enable the SMC cache from ovsdb config. */
bool smc_enable_db;
};
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
* the performance overhead of interrupt processing. Therefore netdev can
* not implement rx-wait for these devices. dpif-netdev needs to poll
* these device to check for recv buffer. pmd-thread does polling for
* devices assigned to itself.
*
* DPDK used PMD for accessing NIC.
*
* Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for
* I/O of all non-pmd threads. There will be no actual thread created
* for the instance.
*
* Each struct has its own flow cache and classifier per managed ingress port.
* For packets received on ingress port, a look up is done on corresponding PMD
* thread's flow cache and in case of a miss, lookup is performed in the
* corresponding classifier of port. Packets are executed with the found
* actions in either case.
* */
struct dp_netdev_pmd_thread {
struct dp_netdev *dp;
struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
struct cmap_node node; /* In 'dp->poll_threads'. */
/* Per thread exact match cache and signature match cache. Note, the
* instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
* threads, and thusly need to be protected by 'non_pmd_mutex'. Every
* other instance will only be accessed by its own pmd thread. */
struct dfc_cache flow_cache;
/* Flow-Table and classifiers
*
* Writers of 'flow_table'/'simple_match_table' and their n* ccmap's must
* take the 'flow_mutex'. Corresponding changes to 'classifiers' must be
* made while still holding the 'flow_mutex'.
*/
struct ovs_mutex flow_mutex;
struct cmap flow_table OVS_GUARDED; /* Flow table. */
struct cmap simple_match_table OVS_GUARDED; /* Flow table with simple
match flows only. */
/* Number of flows in the 'flow_table' per in_port. */
struct ccmap n_flows OVS_GUARDED;
/* Number of flows in the 'simple_match_table' per in_port. */
struct ccmap n_simple_flows OVS_GUARDED;
/* One classifier per in_port polled by the pmd */
struct cmap classifiers;
/* Periodically sort subtable vectors according to hit frequencies */
long long int next_optimization;
/* End of the next time interval for which processing cycles
are stored for each polled rxq. */
long long int next_cycle_store;
/* Last interval timestamp. */
uint64_t intrvl_tsc_prev;
/* Last interval cycles. */
atomic_ullong intrvl_cycles;
/* Write index for 'busy_cycles_intrvl'. */
atomic_count intrvl_idx;
/* Busy cycles in last PMD_INTERVAL_MAX intervals. */
atomic_ullong *busy_cycles_intrvl;
/* Current context of the PMD thread. */
struct dp_netdev_pmd_thread_ctx ctx;
/* Function pointer to call for dp_netdev_input() functionality. */
ATOMIC(dp_netdev_input_func) netdev_input_func;
/* Pointer for per-DPIF implementation scratch space. */
void *netdev_input_func_userdata;
/* Function pointer to call for miniflow_extract() functionality. */
ATOMIC(miniflow_extract_func) miniflow_extract_opt;
struct seq *reload_seq;
uint64_t last_reload_seq;
/* These are atomic variables used as a synchronization and configuration
* points for thread reload/exit.
*
* 'reload' atomic is the main one and it's used as a memory
* synchronization point for all other knobs and data.
*
* For a thread that requests PMD reload:
*
* * All changes that should be visible to the PMD thread must be made
* before setting the 'reload'. These changes could use any memory
* ordering model including 'relaxed'.
* * Setting the 'reload' atomic should occur in the same thread where
* all other PMD configuration options updated.
* * Setting the 'reload' atomic should be done with 'release' memory
* ordering model or stricter. This will guarantee that all previous
* changes (including non-atomic and 'relaxed') will be visible to
* the PMD thread.
* * To check that reload is done, thread should poll the 'reload' atomic
* to become 'false'. Polling should be done with 'acquire' memory
* ordering model or stricter. This ensures that PMD thread completed
* the reload process.
*
* For the PMD thread:
*
* * PMD thread should read 'reload' atomic with 'acquire' memory
* ordering model or stricter. This will guarantee that all changes
* made before setting the 'reload' in the requesting thread will be
* visible to the PMD thread.
* * All other configuration data could be read with any memory
* ordering model (including non-atomic and 'relaxed') but *only after*
* reading the 'reload' atomic set to 'true'.
* * When the PMD reload done, PMD should (optionally) set all the below
* knobs except the 'reload' to their default ('false') values and
* (mandatory), as the last step, set the 'reload' to 'false' using
* 'release' memory ordering model or stricter. This will inform the
* requesting thread that PMD has completed a reload cycle.
*/
atomic_bool reload; /* Do we need to reload ports? */
atomic_bool wait_for_reload; /* Can we busy wait for the next reload? */
atomic_bool reload_tx_qid; /* Do we need to reload static_tx_qid? */
atomic_bool exit; /* For terminating the pmd thread. */
pthread_t thread;
unsigned core_id; /* CPU core id of this pmd thread. */
int numa_id; /* numa node id of this pmd thread. */
bool isolated;
/* Max sleep request in microseconds. */
atomic_uint64_t max_sleep;
/* Queue id used by this pmd thread to send packets on all netdevs if
* XPS disabled for this netdev. All static_tx_qid's are unique and less
* than 'cmap_count(dp->poll_threads)'. */
uint32_t static_tx_qid;
/* Number of filled output batches. */
int n_output_batches;
struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */
/* List of rx queues to poll. */
struct hmap poll_list OVS_GUARDED;
/* Map of 'tx_port's used for transmission. Written by the main thread,
* read by the pmd thread. */
struct hmap tx_ports OVS_GUARDED;
struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. */
/* Map of 'tx_bond's used for transmission. Written by the main thread
* and read by the pmd thread. */
struct cmap tx_bonds;
/* These are thread-local copies of 'tx_ports'. One contains only tunnel
* ports (that support push_tunnel/pop_tunnel), the other contains ports
* with at least one txq (that support send). A port can be in both.
*
* There are two separate maps to make sure that we don't try to execute
* OUTPUT on a device which has 0 txqs or PUSH/POP on a non-tunnel device.
*
* The instances for cpu core NON_PMD_CORE_ID can be accessed by multiple
* threads, and thusly need to be protected by 'non_pmd_mutex'. Every
* other instance will only be accessed by its own pmd thread. */
struct hmap tnl_port_cache;
struct hmap send_port_cache;
/* Keep track of detailed PMD performance statistics. */
struct pmd_perf_stats perf_stats;
/* Stats from previous iteration used by automatic pmd
* load balance logic. */
uint64_t prev_stats[PMD_N_STATS];
atomic_count pmd_overloaded;
/* Set to true if the pmd thread needs to be reloaded. */
bool need_reload;
/* Next time when PMD should try RCU quiescing. */
long long next_rcu_quiesce;
};
#ifdef __cplusplus
}
#endif
#endif /* dpif-netdev-private-thread.h */