2021-07-09 15:58:15 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
|
|
|
|
* Copyright (c) 2019, 2020, 2021 Intel Corporation.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DPIF_NETDEV_PRIVATE_THREAD_H
|
|
|
|
#define DPIF_NETDEV_PRIVATE_THREAD_H 1
|
|
|
|
|
|
|
|
#include "dpif.h"
|
|
|
|
#include "dpif-netdev-perf.h"
|
|
|
|
#include "dpif-netdev-private-dfc.h"
|
|
|
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
#include "cmap.h"
|
|
|
|
#include "openvswitch/thread.h"
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* PMD Thread Structures */
|
|
|
|
|
|
|
|
/* A set of properties for the current processing loop that is not directly
|
|
|
|
* associated with the pmd thread itself, but with the packets being
|
|
|
|
* processed or the short-term system configuration (for example, time).
|
|
|
|
* Contained by struct dp_netdev_pmd_thread's 'ctx' member. */
|
|
|
|
struct dp_netdev_pmd_thread_ctx {
|
|
|
|
/* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
|
|
|
|
long long now;
|
|
|
|
/* RX queue from which last packet was received. */
|
|
|
|
struct dp_netdev_rxq *last_rxq;
|
|
|
|
/* EMC insertion probability context for the current processing cycle. */
|
|
|
|
uint32_t emc_insert_min;
|
|
|
|
};
|
|
|
|
|
2021-07-09 15:58:16 +00:00
|
|
|
/* Forward declaration for typedef. */
|
|
|
|
struct dp_netdev_pmd_thread;
|
|
|
|
|
|
|
|
typedef void (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
|
|
|
|
struct dp_packet_batch *packets,
|
|
|
|
odp_port_t port_no);
|
|
|
|
|
2021-07-09 15:58:15 +00:00
|
|
|
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
|
|
|
|
* the performance overhead of interrupt processing. Therefore netdev can
|
|
|
|
* not implement rx-wait for these devices. dpif-netdev needs to poll
|
|
|
|
* these device to check for recv buffer. pmd-thread does polling for
|
|
|
|
* devices assigned to itself.
|
|
|
|
*
|
|
|
|
* DPDK used PMD for accessing NIC.
|
|
|
|
*
|
|
|
|
* Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for
|
|
|
|
* I/O of all non-pmd threads. There will be no actual thread created
|
|
|
|
* for the instance.
|
|
|
|
*
|
|
|
|
* Each struct has its own flow cache and classifier per managed ingress port.
|
|
|
|
* For packets received on ingress port, a look up is done on corresponding PMD
|
|
|
|
* thread's flow cache and in case of a miss, lookup is performed in the
|
|
|
|
* corresponding classifier of port. Packets are executed with the found
|
|
|
|
* actions in either case.
|
|
|
|
* */
|
|
|
|
struct dp_netdev_pmd_thread {
|
|
|
|
struct dp_netdev *dp;
|
|
|
|
struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
|
|
|
|
struct cmap_node node; /* In 'dp->poll_threads'. */
|
|
|
|
|
|
|
|
/* Per thread exact-match cache. Note, the instance for cpu core
|
|
|
|
* NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
|
|
|
|
* need to be protected by 'non_pmd_mutex'. Every other instance
|
|
|
|
* will only be accessed by its own pmd thread. */
|
|
|
|
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache;
|
|
|
|
|
|
|
|
/* Flow-Table and classifiers
|
|
|
|
*
|
|
|
|
* Writers of 'flow_table' must take the 'flow_mutex'. Corresponding
|
|
|
|
* changes to 'classifiers' must be made while still holding the
|
|
|
|
* 'flow_mutex'.
|
|
|
|
*/
|
|
|
|
struct ovs_mutex flow_mutex;
|
|
|
|
struct cmap flow_table OVS_GUARDED; /* Flow table. */
|
|
|
|
|
|
|
|
/* One classifier per in_port polled by the pmd */
|
|
|
|
struct cmap classifiers;
|
|
|
|
/* Periodically sort subtable vectors according to hit frequencies */
|
|
|
|
long long int next_optimization;
|
|
|
|
/* End of the next time interval for which processing cycles
|
|
|
|
are stored for each polled rxq. */
|
|
|
|
long long int rxq_next_cycle_store;
|
|
|
|
|
|
|
|
/* Last interval timestamp. */
|
|
|
|
uint64_t intrvl_tsc_prev;
|
|
|
|
/* Last interval cycles. */
|
|
|
|
atomic_ullong intrvl_cycles;
|
|
|
|
|
|
|
|
/* Current context of the PMD thread. */
|
|
|
|
struct dp_netdev_pmd_thread_ctx ctx;
|
|
|
|
|
2021-07-09 15:58:16 +00:00
|
|
|
/* Function pointer to call for dp_netdev_input() functionality. */
|
|
|
|
ATOMIC(dp_netdev_input_func) netdev_input_func;
|
|
|
|
|
2021-07-09 15:58:15 +00:00
|
|
|
struct seq *reload_seq;
|
|
|
|
uint64_t last_reload_seq;
|
|
|
|
|
|
|
|
/* These are atomic variables used as a synchronization and configuration
|
|
|
|
* points for thread reload/exit.
|
|
|
|
*
|
|
|
|
* 'reload' atomic is the main one and it's used as a memory
|
|
|
|
* synchronization point for all other knobs and data.
|
|
|
|
*
|
|
|
|
* For a thread that requests PMD reload:
|
|
|
|
*
|
|
|
|
* * All changes that should be visible to the PMD thread must be made
|
|
|
|
* before setting the 'reload'. These changes could use any memory
|
|
|
|
* ordering model including 'relaxed'.
|
|
|
|
* * Setting the 'reload' atomic should occur in the same thread where
|
|
|
|
* all other PMD configuration options updated.
|
|
|
|
* * Setting the 'reload' atomic should be done with 'release' memory
|
|
|
|
* ordering model or stricter. This will guarantee that all previous
|
|
|
|
* changes (including non-atomic and 'relaxed') will be visible to
|
|
|
|
* the PMD thread.
|
|
|
|
* * To check that reload is done, thread should poll the 'reload' atomic
|
|
|
|
* to become 'false'. Polling should be done with 'acquire' memory
|
|
|
|
* ordering model or stricter. This ensures that PMD thread completed
|
|
|
|
* the reload process.
|
|
|
|
*
|
|
|
|
* For the PMD thread:
|
|
|
|
*
|
|
|
|
* * PMD thread should read 'reload' atomic with 'acquire' memory
|
|
|
|
* ordering model or stricter. This will guarantee that all changes
|
|
|
|
* made before setting the 'reload' in the requesting thread will be
|
|
|
|
* visible to the PMD thread.
|
|
|
|
* * All other configuration data could be read with any memory
|
|
|
|
* ordering model (including non-atomic and 'relaxed') but *only after*
|
|
|
|
* reading the 'reload' atomic set to 'true'.
|
|
|
|
* * When the PMD reload done, PMD should (optionally) set all the below
|
|
|
|
* knobs except the 'reload' to their default ('false') values and
|
|
|
|
* (mandatory), as the last step, set the 'reload' to 'false' using
|
|
|
|
* 'release' memory ordering model or stricter. This will inform the
|
|
|
|
* requesting thread that PMD has completed a reload cycle.
|
|
|
|
*/
|
|
|
|
atomic_bool reload; /* Do we need to reload ports? */
|
|
|
|
atomic_bool wait_for_reload; /* Can we busy wait for the next reload? */
|
|
|
|
atomic_bool reload_tx_qid; /* Do we need to reload static_tx_qid? */
|
|
|
|
atomic_bool exit; /* For terminating the pmd thread. */
|
|
|
|
|
|
|
|
pthread_t thread;
|
|
|
|
unsigned core_id; /* CPU core id of this pmd thread. */
|
|
|
|
int numa_id; /* numa node id of this pmd thread. */
|
|
|
|
bool isolated;
|
|
|
|
|
|
|
|
/* Queue id used by this pmd thread to send packets on all netdevs if
|
|
|
|
* XPS disabled for this netdev. All static_tx_qid's are unique and less
|
|
|
|
* than 'cmap_count(dp->poll_threads)'. */
|
|
|
|
uint32_t static_tx_qid;
|
|
|
|
|
|
|
|
/* Number of filled output batches. */
|
|
|
|
int n_output_batches;
|
|
|
|
|
|
|
|
struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */
|
|
|
|
/* List of rx queues to poll. */
|
|
|
|
struct hmap poll_list OVS_GUARDED;
|
|
|
|
/* Map of 'tx_port's used for transmission. Written by the main thread,
|
|
|
|
* read by the pmd thread. */
|
|
|
|
struct hmap tx_ports OVS_GUARDED;
|
|
|
|
|
|
|
|
struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. */
|
|
|
|
/* Map of 'tx_bond's used for transmission. Written by the main thread
|
|
|
|
* and read by the pmd thread. */
|
|
|
|
struct cmap tx_bonds;
|
|
|
|
|
|
|
|
/* These are thread-local copies of 'tx_ports'. One contains only tunnel
|
|
|
|
* ports (that support push_tunnel/pop_tunnel), the other contains ports
|
|
|
|
* with at least one txq (that support send). A port can be in both.
|
|
|
|
*
|
|
|
|
* There are two separate maps to make sure that we don't try to execute
|
|
|
|
* OUTPUT on a device which has 0 txqs or PUSH/POP on a non-tunnel device.
|
|
|
|
*
|
|
|
|
* The instances for cpu core NON_PMD_CORE_ID can be accessed by multiple
|
|
|
|
* threads, and thusly need to be protected by 'non_pmd_mutex'. Every
|
|
|
|
* other instance will only be accessed by its own pmd thread. */
|
|
|
|
struct hmap tnl_port_cache;
|
|
|
|
struct hmap send_port_cache;
|
|
|
|
|
|
|
|
/* Keep track of detailed PMD performance statistics. */
|
|
|
|
struct pmd_perf_stats perf_stats;
|
|
|
|
|
|
|
|
/* Stats from previous iteration used by automatic pmd
|
|
|
|
* load balance logic. */
|
|
|
|
uint64_t prev_stats[PMD_N_STATS];
|
|
|
|
atomic_count pmd_overloaded;
|
|
|
|
|
|
|
|
/* Set to true if the pmd thread needs to be reloaded. */
|
|
|
|
bool need_reload;
|
|
|
|
|
|
|
|
/* Next time when PMD should try RCU quiescing. */
|
|
|
|
long long next_rcu_quiesce;
|
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* dpif-netdev-private-thread.h */
|