2016-01-06 15:44:39 -08:00
|
|
|
|
/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
|
2013-06-25 14:45:43 -07:00
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License. */
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
#include "ofproto-dpif-upcall.h"
|
|
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
#include <inttypes.h>
|
|
|
|
|
|
2013-10-22 16:16:31 -07:00
|
|
|
|
#include "connmgr.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "coverage.h"
|
2014-05-05 15:44:40 +12:00
|
|
|
|
#include "cmap.h"
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
#include "lib/dpif-provider.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "dpif.h"
|
2016-03-03 10:20:46 -08:00
|
|
|
|
#include "openvswitch/dynamic-string.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "fail-open.h"
|
2013-09-12 17:42:23 -07:00
|
|
|
|
#include "guarded-list.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "latch.h"
|
2016-03-25 14:10:21 -07:00
|
|
|
|
#include "openvswitch/list.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "netlink.h"
|
2016-03-25 14:10:24 -07:00
|
|
|
|
#include "openvswitch/ofpbuf.h"
|
2013-09-24 15:04:04 -07:00
|
|
|
|
#include "ofproto-dpif-ipfix.h"
|
|
|
|
|
#include "ofproto-dpif-sflow.h"
|
2013-09-24 13:39:56 -07:00
|
|
|
|
#include "ofproto-dpif-xlate.h"
|
2016-09-14 16:51:27 -07:00
|
|
|
|
#include "ofproto-dpif-xlate-cache.h"
|
2018-02-28 16:32:27 -08:00
|
|
|
|
#include "ofproto-dpif-trace.h"
|
2014-03-18 16:34:28 -07:00
|
|
|
|
#include "ovs-rcu.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "packets.h"
|
2017-11-03 13:53:53 +08:00
|
|
|
|
#include "openvswitch/poll-loop.h"
|
2013-11-20 18:06:12 -08:00
|
|
|
|
#include "seq.h"
|
2017-10-03 17:31:34 -07:00
|
|
|
|
#include "tunnel.h"
|
2013-11-20 18:06:12 -08:00
|
|
|
|
#include "unixctl.h"
|
2023-01-23 12:03:29 +01:00
|
|
|
|
#include "openvswitch/usdt-probes.h"
|
2014-12-15 14:10:38 +01:00
|
|
|
|
#include "openvswitch/vlog.h"
|
2018-10-18 21:43:13 +05:30
|
|
|
|
#include "lib/netdev-provider.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-07-26 06:51:55 +00:00
|
|
|
|
#define UPCALL_MAX_BATCH 64
|
2013-09-24 13:39:56 -07:00
|
|
|
|
#define REVALIDATE_MAX_BATCH 50
|
2023-02-27 16:29:26 +01:00
|
|
|
|
#define UINT64_THREE_QUARTERS (UINT64_MAX / 4 * 3)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
|
|
|
|
VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
|
|
|
|
|
|
2014-09-26 17:28:05 +00:00
|
|
|
|
COVERAGE_DEFINE(dumped_duplicate_flow);
|
2023-07-01 05:11:16 +00:00
|
|
|
|
COVERAGE_DEFINE(dumped_inconsistent_flow);
|
2014-09-26 17:28:05 +00:00
|
|
|
|
COVERAGE_DEFINE(dumped_new_flow);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
COVERAGE_DEFINE(handler_duplicate_upcall);
|
2014-07-08 07:04:05 +00:00
|
|
|
|
COVERAGE_DEFINE(revalidate_missed_dp_flow);
|
2024-08-29 09:00:06 +02:00
|
|
|
|
COVERAGE_DEFINE(revalidate_missing_dp_flow);
|
2023-02-27 16:29:26 +01:00
|
|
|
|
COVERAGE_DEFINE(ukey_dp_change);
|
|
|
|
|
COVERAGE_DEFINE(ukey_invalid_stat_reset);
|
2024-04-04 14:09:37 +02:00
|
|
|
|
COVERAGE_DEFINE(ukey_replace_contention);
|
2024-01-10 12:25:56 +01:00
|
|
|
|
COVERAGE_DEFINE(upcall_flow_limit_grew);
|
2020-04-20 19:13:42 +05:30
|
|
|
|
COVERAGE_DEFINE(upcall_flow_limit_hit);
|
2020-09-30 16:23:59 -03:00
|
|
|
|
COVERAGE_DEFINE(upcall_flow_limit_kill);
|
2024-01-10 12:25:56 +01:00
|
|
|
|
COVERAGE_DEFINE(upcall_flow_limit_reduced);
|
|
|
|
|
COVERAGE_DEFINE(upcall_flow_limit_scaled);
|
2023-02-27 16:29:26 +01:00
|
|
|
|
COVERAGE_DEFINE(upcall_ukey_contention);
|
|
|
|
|
COVERAGE_DEFINE(upcall_ukey_replace);
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
/* A thread that reads upcalls from dpif, forwards each upcall's packet,
|
|
|
|
|
* and possibly sets up a kernel flow as a cache. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct handler {
|
|
|
|
|
struct udpif *udpif; /* Parent udpif. */
|
|
|
|
|
pthread_t thread; /* Thread ID. */
|
2014-02-26 23:03:24 -08:00
|
|
|
|
uint32_t handler_id; /* Handler id. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
};
|
|
|
|
|
|
2014-06-05 17:28:46 +12:00
|
|
|
|
/* In the absence of a multiple-writer multiple-reader datastructure for
|
2016-01-07 16:16:25 -08:00
|
|
|
|
* storing udpif_keys ("ukeys"), we use a large number of cmaps, each with its
|
|
|
|
|
* own lock for writing. */
|
2014-06-05 17:28:46 +12:00
|
|
|
|
#define N_UMAPS 512 /* per udpif. */
|
|
|
|
|
struct umap {
|
|
|
|
|
struct ovs_mutex mutex; /* Take for writing to the following. */
|
|
|
|
|
struct cmap cmap; /* Datapath flow keys. */
|
|
|
|
|
};
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* A thread that processes datapath flows, updates OpenFlow statistics, and
|
2016-01-07 16:16:25 -08:00
|
|
|
|
* updates or removes them if necessary.
|
|
|
|
|
*
|
|
|
|
|
* Revalidator threads operate in two phases: "dump" and "sweep". In between
|
|
|
|
|
* each phase, all revalidators sync up so that all revalidator threads are
|
|
|
|
|
* either in one phase or the other, but not a combination.
|
|
|
|
|
*
|
|
|
|
|
* During the dump phase, revalidators fetch flows from the datapath and
|
|
|
|
|
* attribute the statistics to OpenFlow rules. Each datapath flow has a
|
|
|
|
|
* corresponding ukey which caches the most recently seen statistics. If
|
|
|
|
|
* a flow needs to be deleted (for example, because it is unused over a
|
|
|
|
|
* period of time), revalidator threads may delete the flow during the
|
|
|
|
|
* dump phase. The datapath is not guaranteed to reliably dump all flows
|
|
|
|
|
* from the datapath, and there is no mapping between datapath flows to
|
|
|
|
|
* revalidators, so a particular flow may be handled by zero or more
|
|
|
|
|
* revalidators during a single dump phase. To avoid duplicate attribution
|
|
|
|
|
* of statistics, ukeys are never deleted during this phase.
|
|
|
|
|
*
|
|
|
|
|
* During the sweep phase, each revalidator takes ownership of a different
|
|
|
|
|
* slice of umaps and sweeps through all ukeys in those umaps to figure out
|
|
|
|
|
* whether they need to be deleted. During this phase, revalidators may
|
|
|
|
|
* fetch individual flows which were not dumped during the dump phase to
|
|
|
|
|
* validate them and attribute statistics.
|
|
|
|
|
*/
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator {
|
|
|
|
|
struct udpif *udpif; /* Parent udpif. */
|
|
|
|
|
pthread_t thread; /* Thread ID. */
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
unsigned int id; /* ovsthread_id_self(). */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
};
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
/* An upcall handler for ofproto_dpif.
|
|
|
|
|
*
|
2014-02-26 23:03:24 -08:00
|
|
|
|
* udpif keeps records of two kind of logically separate units:
|
|
|
|
|
*
|
|
|
|
|
* upcall handling
|
|
|
|
|
* ---------------
|
|
|
|
|
*
|
|
|
|
|
* - An array of 'struct handler's for upcall handling and flow
|
|
|
|
|
* installation.
|
2013-09-24 13:39:56 -07:00
|
|
|
|
*
|
2014-02-26 23:03:24 -08:00
|
|
|
|
* flow revalidation
|
|
|
|
|
* -----------------
|
|
|
|
|
*
|
2014-04-10 07:14:08 +00:00
|
|
|
|
* - Revalidation threads which read the datapath flow table and maintains
|
|
|
|
|
* them.
|
|
|
|
|
*/
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct udpif {
|
2014-12-15 14:10:38 +01:00
|
|
|
|
struct ovs_list list_node; /* In all_udpifs list. */
|
2013-11-20 18:06:12 -08:00
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct dpif *dpif; /* Datapath handle. */
|
|
|
|
|
struct dpif_backer *backer; /* Opaque dpif_backer pointer. */
|
|
|
|
|
|
2013-09-24 15:04:04 -07:00
|
|
|
|
struct handler *handlers; /* Upcall handlers. */
|
2021-07-16 06:17:34 -04:00
|
|
|
|
uint32_t n_handlers;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator *revalidators; /* Flow revalidators. */
|
2021-07-16 06:17:34 -04:00
|
|
|
|
uint32_t n_revalidators;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
struct latch exit_latch; /* Tells child threads to exit. */
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Revalidation. */
|
|
|
|
|
struct seq *reval_seq; /* Incremented to force revalidation. */
|
|
|
|
|
bool reval_exit; /* Set by leader on 'exit_latch. */
|
2014-05-29 15:37:37 -07:00
|
|
|
|
struct ovs_barrier reval_barrier; /* Barrier used by revalidators. */
|
2014-05-20 11:37:02 -07:00
|
|
|
|
struct dpif_flow_dump *dump; /* DPIF flow dump state. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
long long int dump_duration; /* Duration of the last flow dump. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct seq *dump_seq; /* Increments each dump iteration. */
|
2014-10-06 11:14:08 +13:00
|
|
|
|
atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
2015-08-29 06:09:45 +00:00
|
|
|
|
/* These variables provide a mechanism for the main thread to pause
|
|
|
|
|
* all revalidation without having to completely shut the threads down.
|
|
|
|
|
* 'pause_latch' is shared between the main thread and the lead
|
|
|
|
|
* revalidator thread, so when it is desirable to halt revalidation, the
|
|
|
|
|
* main thread will set the latch. 'pause' and 'pause_barrier' are shared
|
|
|
|
|
* by revalidator threads. The lead revalidator will set 'pause' when it
|
|
|
|
|
* observes the latch has been set, and this will cause all revalidator
|
|
|
|
|
* threads to wait on 'pause_barrier' at the beginning of the next
|
|
|
|
|
* revalidation round. */
|
|
|
|
|
bool pause; /* Set by leader on 'pause_latch. */
|
|
|
|
|
struct latch pause_latch; /* Set to force revalidators pause. */
|
|
|
|
|
struct ovs_barrier pause_barrier; /* Barrier used to pause all */
|
|
|
|
|
/* revalidators by main thread. */
|
|
|
|
|
|
2014-06-05 17:28:46 +12:00
|
|
|
|
/* There are 'N_UMAPS' maps containing 'struct udpif_key' elements.
|
2014-04-10 07:14:08 +00:00
|
|
|
|
*
|
|
|
|
|
* During the flow dump phase, revalidators insert into these with a random
|
|
|
|
|
* distribution. During the garbage collection phase, each revalidator
|
2014-06-05 17:28:46 +12:00
|
|
|
|
* takes care of garbage collecting a slice of these maps. */
|
|
|
|
|
struct umap *ukeys;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Datapath flow statistics. */
|
|
|
|
|
unsigned int max_n_flows;
|
|
|
|
|
unsigned int avg_n_flows;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Following fields are accessed and modified by different threads. */
|
|
|
|
|
atomic_uint flow_limit; /* Datapath flow hard limit. */
|
2014-01-22 06:50:49 +00:00
|
|
|
|
|
|
|
|
|
/* n_flows_mutex prevents multiple threads updating these concurrently. */
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_uint n_flows; /* Number of flows in the datapath. */
|
2014-01-22 06:50:49 +00:00
|
|
|
|
atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */
|
|
|
|
|
struct ovs_mutex n_flows_mutex;
|
2014-06-25 14:02:45 +00:00
|
|
|
|
|
|
|
|
|
/* Following fields are accessed and modified only from the main thread. */
|
|
|
|
|
struct unixctl_conn **conns; /* Connections waiting on dump_seq. */
|
|
|
|
|
uint64_t conn_seq; /* Corresponds to 'dump_seq' when
|
|
|
|
|
conns[n_conns-1] was stored. */
|
|
|
|
|
size_t n_conns; /* Number of connections waiting. */
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
|
|
|
|
|
long long int offload_rebalance_time; /* Time of last offload rebalance */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
};
|
|
|
|
|
|
2013-09-24 15:04:04 -07:00
|
|
|
|
enum upcall_type {
|
|
|
|
|
BAD_UPCALL, /* Some kind of bug somewhere. */
|
|
|
|
|
MISS_UPCALL, /* A flow miss. */
|
2017-09-29 17:44:08 -07:00
|
|
|
|
SLOW_PATH_UPCALL, /* Slow path upcall. */
|
2013-09-24 15:04:04 -07:00
|
|
|
|
SFLOW_UPCALL, /* sFlow sample. */
|
|
|
|
|
FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */
|
2017-07-05 15:17:52 -07:00
|
|
|
|
IPFIX_UPCALL, /* Per-bridge sampling. */
|
|
|
|
|
CONTROLLER_UPCALL /* Destined for the controller. */
|
2013-09-24 15:04:04 -07:00
|
|
|
|
};
|
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
enum reval_result {
|
|
|
|
|
UKEY_KEEP,
|
|
|
|
|
UKEY_DELETE,
|
|
|
|
|
UKEY_MODIFY
|
|
|
|
|
};
|
|
|
|
|
|
2013-09-24 15:04:04 -07:00
|
|
|
|
struct upcall {
|
2014-08-06 18:49:44 -07:00
|
|
|
|
struct ofproto_dpif *ofproto; /* Parent ofproto. */
|
2015-03-26 11:18:16 -07:00
|
|
|
|
const struct recirc_id_node *recirc; /* Recirculation context. */
|
|
|
|
|
bool have_recirc_ref; /* Reference held on recirc ctx? */
|
2014-05-20 21:50:19 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
/* The flow and packet are only required to be constant when using
|
|
|
|
|
* dpif-netdev. If a modification is absolutely necessary, a const cast
|
|
|
|
|
* may be used with other datapaths. */
|
|
|
|
|
const struct flow *flow; /* Parsed representation of the packet. */
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
enum odp_key_fitness fitness; /* Fitness of 'flow' relative to ODP key. */
|
2014-09-24 15:24:39 +12:00
|
|
|
|
const ovs_u128 *ufid; /* Unique identifier for 'flow'. */
|
2015-05-22 17:14:19 +01:00
|
|
|
|
unsigned pmd_id; /* Datapath poll mode driver id. */
|
2015-02-22 03:21:09 -08:00
|
|
|
|
const struct dp_packet *packet; /* Packet associated with this upcall. */
|
2017-10-03 17:31:34 -07:00
|
|
|
|
ofp_port_t ofp_in_port; /* OpenFlow in port, or OFPP_NONE. */
|
2015-02-26 15:52:34 -08:00
|
|
|
|
uint16_t mru; /* If !0, Maximum receive unit of
|
|
|
|
|
fragmented IP packet */
|
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.
The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-11-15 10:58:59 +08:00
|
|
|
|
uint64_t hash;
|
dpif-netlink: Provide original upcall pid in 'execute' commands.
When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall. With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet. This allows us to send
packets from the same traffic flow through the same handler.
The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.
While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again. And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.
However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread. So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.
The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port. In real setups the
number of recirculations can go up to 4 or 5, sometimes more.
There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet. So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.
For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2. Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets. Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.
Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID. This way the upcall triggered during the
execution will go to the same handler. Packets will be enqueued to
the same socket and re-injected in the same order. This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.
The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.
Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-07-08 13:34:02 +02:00
|
|
|
|
uint32_t pid; /* Socket PID this upcall was received from,
|
|
|
|
|
* or zero. */
|
2014-05-20 21:50:19 -07:00
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
enum upcall_type type; /* Type of the upcall. */
|
2015-07-17 21:37:02 -07:00
|
|
|
|
const struct nlattr *actions; /* Flow actions in DPIF_UC_ACTION Upcalls. */
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
|
|
|
|
bool xout_initialized; /* True if 'xout' must be uninitialized. */
|
|
|
|
|
struct xlate_out xout; /* Result of xlate_actions(). */
|
2015-07-31 13:34:16 -07:00
|
|
|
|
struct ofpbuf odp_actions; /* Datapath actions from xlate_actions(). */
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards wc; /* Dependencies that megaflow must match. */
|
2015-07-22 16:38:18 -07:00
|
|
|
|
struct ofpbuf put_actions; /* Actions 'put' in the fastpath. */
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2014-08-22 09:01:36 -07:00
|
|
|
|
struct dpif_ipfix *ipfix; /* IPFIX pointer or NULL. */
|
|
|
|
|
struct dpif_sflow *sflow; /* SFlow pointer or NULL. */
|
2014-05-20 21:50:19 -07:00
|
|
|
|
|
2014-07-25 13:54:24 +12:00
|
|
|
|
struct udpif_key *ukey; /* Revalidator flow cache. */
|
|
|
|
|
bool ukey_persists; /* Set true to keep 'ukey' beyond the
|
|
|
|
|
lifetime of this upcall. */
|
|
|
|
|
|
|
|
|
|
uint64_t reval_seq; /* udpif->reval_seq at translation time. */
|
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
/* Not used by the upcall callback interface. */
|
|
|
|
|
const struct nlattr *key; /* Datapath flow key. */
|
|
|
|
|
size_t key_len; /* Datapath flow key length. */
|
2014-08-17 20:19:36 -07:00
|
|
|
|
const struct nlattr *out_tun_key; /* Datapath output tunnel key. */
|
2015-07-31 13:34:16 -07:00
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
struct user_action_cookie cookie;
|
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
|
2013-09-24 15:04:04 -07:00
|
|
|
|
};
|
|
|
|
|
|
2016-08-31 11:06:04 -07:00
|
|
|
|
/* Ukeys must transition through these states using transition_ukey(). */
|
|
|
|
|
enum ukey_state {
|
|
|
|
|
UKEY_CREATED = 0,
|
|
|
|
|
UKEY_VISIBLE, /* Ukey is in umap, datapath flow install is queued. */
|
|
|
|
|
UKEY_OPERATIONAL, /* Ukey is in umap, datapath flow is installed. */
|
2023-07-01 05:11:16 +00:00
|
|
|
|
UKEY_INCONSISTENT, /* Ukey is in umap, datapath flow is inconsistent. */
|
2016-08-31 11:06:04 -07:00
|
|
|
|
UKEY_EVICTING, /* Ukey is in umap, datapath flow delete is queued. */
|
|
|
|
|
UKEY_EVICTED, /* Ukey is in umap, datapath flow is deleted. */
|
|
|
|
|
UKEY_DELETED, /* Ukey removed from umap, ukey free is deferred. */
|
|
|
|
|
};
|
|
|
|
|
#define N_UKEY_STATES (UKEY_DELETED + 1)
|
|
|
|
|
|
2024-05-14 15:15:34 +02:00
|
|
|
|
/* Ukey delete reasons used by USDT probes. Please keep in sync with the
|
|
|
|
|
* definition in utilities/usdt-scripts/flow_reval_monitor.py. */
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
enum flow_del_reason {
|
2024-05-14 15:15:34 +02:00
|
|
|
|
FDR_NONE = 0, /* No delete reason specified. */
|
|
|
|
|
FDR_AVOID_CACHING, /* Cache avoidance flag set. */
|
|
|
|
|
FDR_BAD_ODP_FIT, /* Bad ODP flow fit. */
|
|
|
|
|
FDR_FLOW_IDLE, /* Flow idle timeout. */
|
|
|
|
|
FDR_FLOW_LIMIT, /* Kill all flows condition reached. */
|
|
|
|
|
FDR_FLOW_WILDCARDED, /* Flow needs a narrower wildcard mask. */
|
|
|
|
|
FDR_NO_OFPROTO, /* Bridge not found. */
|
|
|
|
|
FDR_PURGE, /* User requested flow deletion. */
|
|
|
|
|
FDR_TOO_EXPENSIVE, /* Too expensive to revalidate. */
|
|
|
|
|
FDR_UPDATE_FAIL, /* Datapath update failed. */
|
|
|
|
|
FDR_XLATION_ERROR, /* Flow translation error. */
|
2024-08-29 09:00:06 +02:00
|
|
|
|
FDR_FLOW_MISSING_DP, /* Flow is missing from the datapath. */
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
};
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* 'udpif_key's are responsible for tracking the little bit of state udpif
|
|
|
|
|
* needs to do flow expiration which can't be pulled directly from the
|
2014-07-25 13:54:24 +12:00
|
|
|
|
* datapath. They may be created by any handler or revalidator thread at any
|
|
|
|
|
* time, and read by any revalidator during the dump phase. They are however
|
|
|
|
|
* each owned by a single revalidator which takes care of destroying them
|
|
|
|
|
* during the garbage-collection phase.
|
2014-04-10 07:14:08 +00:00
|
|
|
|
*
|
2014-06-05 17:28:46 +12:00
|
|
|
|
* The mutex within the ukey protects some members of the ukey. The ukey
|
|
|
|
|
* itself is protected by RCU and is held within a umap in the parent udpif.
|
|
|
|
|
* Adding or removing a ukey from a umap is only safe when holding the
|
|
|
|
|
* corresponding umap lock. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct udpif_key {
|
2014-05-05 15:44:40 +12:00
|
|
|
|
struct cmap_node cmap_node; /* In parent revalidator 'ukeys' map. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* These elements are read only once created, and therefore aren't
|
|
|
|
|
* protected by a mutex. */
|
|
|
|
|
const struct nlattr *key; /* Datapath flow key. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
size_t key_len; /* Length of 'key'. */
|
2014-08-06 16:40:37 +12:00
|
|
|
|
const struct nlattr *mask; /* Datapath flow mask. */
|
|
|
|
|
size_t mask_len; /* Length of 'mask'. */
|
2014-09-24 15:24:39 +12:00
|
|
|
|
ovs_u128 ufid; /* Unique flow identifier. */
|
2014-09-24 16:26:35 +12:00
|
|
|
|
bool ufid_present; /* True if 'ufid' is in datapath. */
|
2014-05-05 15:44:40 +12:00
|
|
|
|
uint32_t hash; /* Pre-computed hash for 'key'. */
|
2015-05-22 17:14:19 +01:00
|
|
|
|
unsigned pmd_id; /* Datapath poll mode driver id. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct ovs_mutex mutex; /* Guards the following. */
|
|
|
|
|
struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
|
2023-02-27 16:29:26 +01:00
|
|
|
|
const char *dp_layer OVS_GUARDED; /* Last known dp_layer. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
long long int created OVS_GUARDED; /* Estimate of creation time. */
|
2014-05-14 16:17:25 +12:00
|
|
|
|
uint64_t dump_seq OVS_GUARDED; /* Tracks udpif->dump_seq. */
|
2014-07-25 13:54:24 +12:00
|
|
|
|
uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */
|
2016-08-31 11:06:04 -07:00
|
|
|
|
enum ukey_state state OVS_GUARDED; /* Tracks ukey lifetime. */
|
2024-08-29 09:00:06 +02:00
|
|
|
|
uint32_t missed_dumps OVS_GUARDED; /* Missed consecutive dumps. */
|
2016-08-31 11:06:04 -07:00
|
|
|
|
|
2017-04-26 18:03:12 -07:00
|
|
|
|
/* 'state' debug information. */
|
|
|
|
|
unsigned int state_thread OVS_GUARDED; /* Thread that transitions. */
|
|
|
|
|
const char *state_where OVS_GUARDED; /* transition_ukey() locator. */
|
|
|
|
|
|
2015-08-12 14:50:54 -07:00
|
|
|
|
/* Datapath flow actions as nlattrs. Protected by RCU. Read with
|
|
|
|
|
* ukey_get_actions(), and write with ukey_set_actions(). */
|
|
|
|
|
OVSRCU_TYPE(struct ofpbuf *) actions;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
struct xlate_cache *xcache OVS_GUARDED; /* Cache for xlate entries that
|
|
|
|
|
* are affected by this ukey.
|
|
|
|
|
* Used for stats and learning.*/
|
2014-08-05 13:51:19 -07:00
|
|
|
|
union {
|
2014-08-06 16:40:37 +12:00
|
|
|
|
struct odputil_keybuf buf;
|
|
|
|
|
struct nlattr nla;
|
|
|
|
|
} keybuf, maskbuf;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
uint32_t key_recirc_id; /* Non-zero if reference is held by the ukey. */
|
|
|
|
|
struct recirc_refs recircs; /* Action recirc IDs with references held. */
|
2018-10-18 21:43:13 +05:30
|
|
|
|
|
|
|
|
|
#define OFFL_REBAL_INTVL_MSEC 3000 /* dynamic offload rebalance freq */
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
struct netdev *in_netdev; /* in_odp_port's netdev */
|
2018-10-18 21:43:13 +05:30
|
|
|
|
bool offloaded; /* True if flow is offloaded */
|
|
|
|
|
uint64_t flow_pps_rate; /* Packets-Per-Second rate */
|
|
|
|
|
long long int flow_time; /* last pps update time */
|
|
|
|
|
uint64_t flow_packets; /* #pkts seen in interval */
|
|
|
|
|
uint64_t flow_backlog_packets; /* prev-mode #pkts (offl or kernel) */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
};
|
|
|
|
|
|
2014-08-21 00:21:03 +12:00
|
|
|
|
/* Datapath operation with optional ukey attached. */
|
|
|
|
|
struct ukey_op {
|
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
struct dpif_flow_stats stats; /* Stats for 'op'. */
|
|
|
|
|
struct dpif_op dop; /* Flow operation. */
|
|
|
|
|
};
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
2014-12-15 14:10:38 +01:00
|
|
|
|
static struct ovs_list all_udpifs = OVS_LIST_INITIALIZER(&all_udpifs);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
static size_t recv_upcalls(struct handler *);
|
|
|
|
|
static int process_upcall(struct udpif *, struct upcall *,
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct ofpbuf *odp_actions, struct flow_wildcards *);
|
2014-07-26 06:51:55 +00:00
|
|
|
|
static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls);
|
2020-01-09 12:49:44 -08:00
|
|
|
|
static void udpif_stop_threads(struct udpif *, bool delete_flows);
|
2021-07-16 06:17:34 -04:00
|
|
|
|
static void udpif_start_threads(struct udpif *, uint32_t n_handlers,
|
|
|
|
|
uint32_t n_revalidators);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
static void udpif_pause_revalidators(struct udpif *);
|
|
|
|
|
static void udpif_resume_revalidators(struct udpif *);
|
2013-09-24 15:04:04 -07:00
|
|
|
|
static void *udpif_upcall_handler(void *);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void *udpif_revalidator(void *);
|
2014-05-14 16:19:34 +09:00
|
|
|
|
static unsigned long udpif_get_n_flows(struct udpif *);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
static void revalidate(struct revalidator *);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
static void revalidator_pause(struct revalidator *);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void revalidator_sweep(struct revalidator *);
|
2014-02-11 13:55:36 -08:00
|
|
|
|
static void revalidator_purge(struct revalidator *);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
|
|
|
|
static void upcall_unixctl_enable_megaflows(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
static void upcall_unixctl_disable_ufid(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
|
|
|
|
static void upcall_unixctl_enable_ufid(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
|
2014-02-06 09:49:19 -08:00
|
|
|
|
static void upcall_unixctl_set_flow_limit(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2014-06-25 14:02:45 +00:00
|
|
|
|
static void upcall_unixctl_dump_wait(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2014-11-13 10:42:47 -08:00
|
|
|
|
static void upcall_unixctl_purge(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2022-09-13 21:08:51 +02:00
|
|
|
|
static void upcall_unixctl_pause(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
|
|
|
|
static void upcall_unixctl_resume(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
static void upcall_unixctl_ofproto_detrace(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
static struct udpif_key *ukey_create_from_upcall(struct upcall *,
|
|
|
|
|
struct flow_wildcards *);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
static int ukey_create_from_dpif_flow(const struct udpif *,
|
|
|
|
|
const struct dpif_flow *,
|
|
|
|
|
struct udpif_key **);
|
2015-08-12 14:50:54 -07:00
|
|
|
|
static void ukey_get_actions(struct udpif_key *, const struct nlattr **actions,
|
|
|
|
|
size_t *size);
|
2016-08-31 11:06:04 -07:00
|
|
|
|
static bool ukey_install__(struct udpif *, struct udpif_key *ukey)
|
|
|
|
|
OVS_TRY_LOCK(true, ukey->mutex);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
static bool ukey_install(struct udpif *udpif, struct udpif_key *ukey);
|
2017-04-26 18:03:12 -07:00
|
|
|
|
static void transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
|
|
|
|
|
const char *where)
|
2016-08-31 11:06:04 -07:00
|
|
|
|
OVS_REQUIRES(ukey->mutex);
|
2017-04-26 18:03:12 -07:00
|
|
|
|
#define transition_ukey(UKEY, DST) \
|
|
|
|
|
transition_ukey_at(UKEY, DST, OVS_SOURCE_LOCATOR)
|
2014-09-24 15:24:39 +12:00
|
|
|
|
static struct udpif_key *ukey_lookup(struct udpif *udpif,
|
2016-02-03 14:31:43 +03:00
|
|
|
|
const ovs_u128 *ufid,
|
|
|
|
|
const unsigned pmd_id);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
static int ukey_acquire(struct udpif *, const struct dpif_flow *,
|
2014-10-06 11:14:08 +13:00
|
|
|
|
struct udpif_key **result, int *error);
|
2014-05-05 15:44:40 +12:00
|
|
|
|
static void ukey_delete__(struct udpif_key *);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
static void ukey_delete(struct umap *, struct udpif_key *);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
static enum upcall_type classify_upcall(enum dpif_upcall_type type,
|
2017-09-29 17:44:08 -07:00
|
|
|
|
const struct nlattr *userdata,
|
|
|
|
|
struct user_action_cookie *cookie);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2016-08-31 11:06:02 -07:00
|
|
|
|
static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
|
|
|
|
|
enum dpif_flow_put_flags flags);
|
2016-08-31 11:06:04 -07:00
|
|
|
|
static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
|
|
|
|
|
struct udpif_key *ukey);
|
2016-08-31 11:06:02 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
static int upcall_receive(struct upcall *, const struct dpif_backer *,
|
2015-02-22 03:21:09 -08:00
|
|
|
|
const struct dp_packet *packet, enum dpif_upcall_type,
|
2014-09-24 15:24:39 +12:00
|
|
|
|
const struct nlattr *userdata, const struct flow *,
|
2015-02-26 15:52:34 -08:00
|
|
|
|
const unsigned int mru,
|
2022-09-01 17:42:49 +02:00
|
|
|
|
const ovs_u128 *ufid, const unsigned pmd_id,
|
|
|
|
|
char **errorp);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
static void upcall_uninit(struct upcall *);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
static void udpif_flow_rebalance(struct udpif *udpif);
|
|
|
|
|
static int udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
enum dpif_offload_type offload_type);
|
|
|
|
|
static int udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
enum dpif_offload_type offload_type);
|
|
|
|
|
|
2014-07-26 15:39:58 -07:00
|
|
|
|
static upcall_callback upcall_cb;
|
2015-08-25 16:36:46 -07:00
|
|
|
|
static dp_purge_callback dp_purge_cb;
|
2014-07-26 15:39:58 -07:00
|
|
|
|
|
2023-02-28 18:30:56 -08:00
|
|
|
|
static atomic_bool enable_megaflows = true;
|
|
|
|
|
static atomic_bool enable_ufid = true;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2015-07-15 23:41:05 +08:00
|
|
|
|
void
|
|
|
|
|
udpif_init(void)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2013-11-20 18:06:12 -08:00
|
|
|
|
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
|
|
|
|
|
if (ovsthread_once_start(&once)) {
|
|
|
|
|
unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show,
|
|
|
|
|
NULL);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unixctl_command_register("upcall/disable-megaflows", "", 0, 0,
|
|
|
|
|
upcall_unixctl_disable_megaflows, NULL);
|
|
|
|
|
unixctl_command_register("upcall/enable-megaflows", "", 0, 0,
|
|
|
|
|
upcall_unixctl_enable_megaflows, NULL);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
unixctl_command_register("upcall/disable-ufid", "", 0, 0,
|
|
|
|
|
upcall_unixctl_disable_ufid, NULL);
|
|
|
|
|
unixctl_command_register("upcall/enable-ufid", "", 0, 0,
|
|
|
|
|
upcall_unixctl_enable_ufid, NULL);
|
2016-12-06 01:01:22 -08:00
|
|
|
|
unixctl_command_register("upcall/set-flow-limit", "flow-limit-number",
|
|
|
|
|
1, 1, upcall_unixctl_set_flow_limit, NULL);
|
2014-06-25 14:02:45 +00:00
|
|
|
|
unixctl_command_register("revalidator/wait", "", 0, 0,
|
|
|
|
|
upcall_unixctl_dump_wait, NULL);
|
2014-11-13 10:42:47 -08:00
|
|
|
|
unixctl_command_register("revalidator/purge", "", 0, 0,
|
|
|
|
|
upcall_unixctl_purge, NULL);
|
2022-09-13 21:08:51 +02:00
|
|
|
|
unixctl_command_register("revalidator/pause", NULL, 0, 0,
|
|
|
|
|
upcall_unixctl_pause, NULL);
|
|
|
|
|
unixctl_command_register("revalidator/resume", NULL, 0, 0,
|
|
|
|
|
upcall_unixctl_resume, NULL);
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
unixctl_command_register("ofproto/detrace", "UFID [pmd=PMD-ID]", 1, 2,
|
|
|
|
|
upcall_unixctl_ofproto_detrace, NULL);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
ovsthread_once_done(&once);
|
|
|
|
|
}
|
2015-07-15 23:41:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct udpif *
|
|
|
|
|
udpif_create(struct dpif_backer *backer, struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif = xzalloc(sizeof *udpif);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
udpif->dpif = dpif;
|
|
|
|
|
udpif->backer = backer;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000));
|
2013-09-17 14:35:53 -07:00
|
|
|
|
udpif->reval_seq = seq_create();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
udpif->dump_seq = seq_create();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_init(&udpif->exit_latch);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
latch_init(&udpif->pause_latch);
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_push_back(&all_udpifs, &udpif->list_node);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
atomic_init(&udpif->enable_ufid, false);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
atomic_init(&udpif->n_flows, 0);
|
|
|
|
|
atomic_init(&udpif->n_flows_timestamp, LLONG_MIN);
|
|
|
|
|
ovs_mutex_init(&udpif->n_flows_mutex);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
udpif->ukeys = xmalloc(N_UMAPS * sizeof *udpif->ukeys);
|
|
|
|
|
for (int i = 0; i < N_UMAPS; i++) {
|
|
|
|
|
cmap_init(&udpif->ukeys[i].cmap);
|
|
|
|
|
ovs_mutex_init(&udpif->ukeys[i].mutex);
|
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-07-26 15:39:58 -07:00
|
|
|
|
dpif_register_upcall_cb(dpif, upcall_cb, udpif);
|
2015-08-25 16:36:46 -07:00
|
|
|
|
dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif);
|
2014-07-26 06:51:55 +00:00
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return udpif;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-25 14:02:45 +00:00
|
|
|
|
void
|
|
|
|
|
udpif_run(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
if (udpif->conns && udpif->conn_seq != seq_read(udpif->dump_seq)) {
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < udpif->n_conns; i++) {
|
|
|
|
|
unixctl_command_reply(udpif->conns[i], NULL);
|
|
|
|
|
}
|
|
|
|
|
free(udpif->conns);
|
|
|
|
|
udpif->conns = NULL;
|
|
|
|
|
udpif->n_conns = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
void
|
|
|
|
|
udpif_destroy(struct udpif *udpif)
|
|
|
|
|
{
|
2020-01-09 12:49:44 -08:00
|
|
|
|
udpif_stop_threads(udpif, false);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2016-05-16 20:08:01 -07:00
|
|
|
|
dpif_register_dp_purge_cb(udpif->dpif, NULL, udpif);
|
|
|
|
|
dpif_register_upcall_cb(udpif->dpif, NULL, udpif);
|
|
|
|
|
|
2014-06-05 17:28:46 +12:00
|
|
|
|
for (int i = 0; i < N_UMAPS; i++) {
|
2021-01-18 17:12:23 +01:00
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
|
|
|
|
|
CMAP_FOR_EACH (ukey, cmap_node, &udpif->ukeys[i].cmap) {
|
|
|
|
|
ukey_delete__(ukey);
|
|
|
|
|
}
|
2014-06-05 17:28:46 +12:00
|
|
|
|
cmap_destroy(&udpif->ukeys[i].cmap);
|
|
|
|
|
ovs_mutex_destroy(&udpif->ukeys[i].mutex);
|
|
|
|
|
}
|
|
|
|
|
free(udpif->ukeys);
|
|
|
|
|
udpif->ukeys = NULL;
|
|
|
|
|
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_remove(&udpif->list_node);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_destroy(&udpif->exit_latch);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
latch_destroy(&udpif->pause_latch);
|
2013-09-17 14:35:53 -07:00
|
|
|
|
seq_destroy(udpif->reval_seq);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
seq_destroy(udpif->dump_seq);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
ovs_mutex_destroy(&udpif->n_flows_mutex);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
free(udpif);
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-09 12:49:44 -08:00
|
|
|
|
/* Stops the handler and revalidator threads.
|
|
|
|
|
*
|
|
|
|
|
* If 'delete_flows' is true, we delete ukeys and delete all flows from the
|
|
|
|
|
* datapath. Otherwise, we end up double-counting stats for flows that remain
|
|
|
|
|
* in the datapath. If 'delete_flows' is false, we skip this step. This is
|
|
|
|
|
* appropriate if OVS is about to exit anyway and it is desirable to let
|
|
|
|
|
* existing network connections continue being forwarded afterward. */
|
2014-04-21 17:31:11 -07:00
|
|
|
|
static void
|
2020-01-09 12:49:44 -08:00
|
|
|
|
udpif_stop_threads(struct udpif *udpif, bool delete_flows)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-04-21 20:05:08 -07:00
|
|
|
|
if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
size_t i;
|
|
|
|
|
|
2018-11-02 11:25:45 -07:00
|
|
|
|
/* Tell the threads to exit. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_set(&udpif->exit_latch);
|
|
|
|
|
|
2018-11-02 11:25:45 -07:00
|
|
|
|
/* Wait for the threads to exit. Quiesce because this can take a long
|
|
|
|
|
* time.. */
|
|
|
|
|
ovsrcu_quiesce_start();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
for (i = 0; i < udpif->n_handlers; i++) {
|
2018-11-02 11:25:45 -07:00
|
|
|
|
xpthread_join(udpif->handlers[i].thread, NULL);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
xpthread_join(udpif->revalidators[i].thread, NULL);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-07-26 06:51:55 +00:00
|
|
|
|
dpif_disable_upcall(udpif->dpif);
|
2018-11-02 11:25:45 -07:00
|
|
|
|
ovsrcu_quiesce_end();
|
2014-07-26 06:51:55 +00:00
|
|
|
|
|
2020-01-09 12:49:44 -08:00
|
|
|
|
if (delete_flows) {
|
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
|
|
|
|
revalidator_purge(&udpif->revalidators[i]);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_poll(&udpif->exit_latch);
|
|
|
|
|
|
2014-05-29 15:37:37 -07:00
|
|
|
|
ovs_barrier_destroy(&udpif->reval_barrier);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
ovs_barrier_destroy(&udpif->pause_barrier);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
free(udpif->revalidators);
|
|
|
|
|
udpif->revalidators = NULL;
|
|
|
|
|
udpif->n_revalidators = 0;
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
free(udpif->handlers);
|
|
|
|
|
udpif->handlers = NULL;
|
|
|
|
|
udpif->n_handlers = 0;
|
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2018-11-02 11:25:45 -07:00
|
|
|
|
/* Starts the handler and revalidator threads. */
|
2014-04-21 17:31:11 -07:00
|
|
|
|
static void
|
2021-07-16 06:17:34 -04:00
|
|
|
|
udpif_start_threads(struct udpif *udpif, uint32_t n_handlers_,
|
|
|
|
|
uint32_t n_revalidators_)
|
2014-04-21 17:31:11 -07:00
|
|
|
|
{
|
2024-02-20 10:31:34 +01:00
|
|
|
|
if (udpif && n_revalidators_) {
|
2018-11-02 11:25:45 -07:00
|
|
|
|
/* Creating a thread can take a significant amount of time on some
|
|
|
|
|
* systems, even hundred of milliseconds, so quiesce around it. */
|
|
|
|
|
ovsrcu_quiesce_start();
|
|
|
|
|
|
2018-02-27 17:34:14 -08:00
|
|
|
|
udpif->n_handlers = n_handlers_;
|
|
|
|
|
udpif->n_revalidators = n_revalidators_;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2024-02-20 10:31:34 +01:00
|
|
|
|
if (udpif->n_handlers) {
|
|
|
|
|
udpif->handlers = xzalloc(udpif->n_handlers
|
|
|
|
|
* sizeof *udpif->handlers);
|
|
|
|
|
for (size_t i = 0; i < udpif->n_handlers; i++) {
|
|
|
|
|
struct handler *handler = &udpif->handlers[i];
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2024-02-20 10:31:34 +01:00
|
|
|
|
handler->udpif = udpif;
|
|
|
|
|
handler->handler_id = i;
|
|
|
|
|
handler->thread = ovs_thread_create(
|
|
|
|
|
"handler", udpif_upcall_handler, handler);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
udpif->handlers = NULL;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-02-27 17:34:14 -08:00
|
|
|
|
atomic_init(&udpif->enable_ufid, udpif->backer->rt_support.ufid);
|
2014-07-26 06:51:55 +00:00
|
|
|
|
dpif_enable_upcall(udpif->dpif);
|
|
|
|
|
|
2014-05-29 15:37:37 -07:00
|
|
|
|
ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
udpif->reval_exit = false;
|
2015-08-29 06:09:45 +00:00
|
|
|
|
udpif->pause = false;
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
udpif->offload_rebalance_time = time_msec();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
udpif->revalidators = xzalloc(udpif->n_revalidators
|
|
|
|
|
* sizeof *udpif->revalidators);
|
2018-02-27 17:34:14 -08:00
|
|
|
|
for (size_t i = 0; i < udpif->n_revalidators; i++) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator *revalidator = &udpif->revalidators[i];
|
|
|
|
|
|
|
|
|
|
revalidator->udpif = udpif;
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
revalidator->thread = ovs_thread_create(
|
|
|
|
|
"revalidator", udpif_revalidator, revalidator);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2018-11-02 11:25:45 -07:00
|
|
|
|
ovsrcu_quiesce_end();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
}
|
2014-03-18 16:34:28 -07:00
|
|
|
|
|
2015-08-29 06:09:45 +00:00
|
|
|
|
/* Pauses all revalidators. Should only be called by the main thread.
|
|
|
|
|
* When function returns, all revalidators are paused and will proceed
|
|
|
|
|
* only after udpif_resume_revalidators() is called. */
|
|
|
|
|
static void
|
|
|
|
|
udpif_pause_revalidators(struct udpif *udpif)
|
|
|
|
|
{
|
ofproto-dpif: Unhide structure contents.
Until now, ofproto-dpif.c has hidden the definitions of several structures,
such as struct ofproto_dpif and struct rule_dpif. This kind of information
hiding is often beneficial, because it forces code outside the file with
the definition to use the documented interfaces. However, in this case it
was starting to burden ofproto-dpif with an increasing number of trivial
helpers that were not improving or maintaining a useful abstraction and
that were making code harder to maintain and read.
Information hiding also made it hard to move blocks of code outside
ofproto-dpif.c itself, since any code moved out often needed new helpers if
it used anything that wasn't previously exposed. In the present instance,
upcoming patches will move code for tracing outside ofproto-dpif, and this
would require adding several helpers that would just obscure the function
of the code otherwise needlessly.
In balance, it seems that there is more harm than good in the information
hiding here, so this commit moves the definitions of several structures
from ofproto-dpif.c into ofproto-dpif.h. It also removes all of the
trivial helpers that had accumulated, instead changing their users to
directly access the members that they needed. It also reorganizes
ofproto-dpif.h, grouping structure definitions and function prototypes in a
sensible way.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Lance Richardson <lrichard@redhat.com>
Acked-by: Justin Pettit <jpettit@ovn.org>
2016-12-06 14:08:42 -08:00
|
|
|
|
if (udpif->backer->recv_set_enable) {
|
2015-09-25 11:42:40 -07:00
|
|
|
|
latch_set(&udpif->pause_latch);
|
|
|
|
|
ovs_barrier_block(&udpif->pause_barrier);
|
|
|
|
|
}
|
2015-08-29 06:09:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Resumes the pausing of revalidators. Should only be called by the
|
|
|
|
|
* main thread. */
|
|
|
|
|
static void
|
|
|
|
|
udpif_resume_revalidators(struct udpif *udpif)
|
|
|
|
|
{
|
ofproto-dpif: Unhide structure contents.
Until now, ofproto-dpif.c has hidden the definitions of several structures,
such as struct ofproto_dpif and struct rule_dpif. This kind of information
hiding is often beneficial, because it forces code outside the file with
the definition to use the documented interfaces. However, in this case it
was starting to burden ofproto-dpif with an increasing number of trivial
helpers that were not improving or maintaining a useful abstraction and
that were making code harder to maintain and read.
Information hiding also made it hard to move blocks of code outside
ofproto-dpif.c itself, since any code moved out often needed new helpers if
it used anything that wasn't previously exposed. In the present instance,
upcoming patches will move code for tracing outside ofproto-dpif, and this
would require adding several helpers that would just obscure the function
of the code otherwise needlessly.
In balance, it seems that there is more harm than good in the information
hiding here, so this commit moves the definitions of several structures
from ofproto-dpif.c into ofproto-dpif.h. It also removes all of the
trivial helpers that had accumulated, instead changing their users to
directly access the members that they needed. It also reorganizes
ofproto-dpif.h, grouping structure definitions and function prototypes in a
sensible way.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Lance Richardson <lrichard@redhat.com>
Acked-by: Justin Pettit <jpettit@ovn.org>
2016-12-06 14:08:42 -08:00
|
|
|
|
if (udpif->backer->recv_set_enable) {
|
2015-09-25 11:42:40 -07:00
|
|
|
|
latch_poll(&udpif->pause_latch);
|
|
|
|
|
ovs_barrier_block(&udpif->pause_barrier);
|
|
|
|
|
}
|
2015-08-29 06:09:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-21 17:31:11 -07:00
|
|
|
|
/* Tells 'udpif' how many threads it should use to handle upcalls.
|
2018-02-27 17:34:14 -08:00
|
|
|
|
* 'n_handlers_' and 'n_revalidators_' can never be zero. 'udpif''s
|
2014-04-21 17:31:11 -07:00
|
|
|
|
* datapath handle must have packet reception enabled before starting
|
|
|
|
|
* threads. */
|
|
|
|
|
void
|
2021-07-16 06:17:34 -04:00
|
|
|
|
udpif_set_threads(struct udpif *udpif, uint32_t n_handlers_,
|
|
|
|
|
uint32_t n_revalidators_)
|
2014-04-21 17:31:11 -07:00
|
|
|
|
{
|
2014-04-21 20:05:08 -07:00
|
|
|
|
ovs_assert(udpif);
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
|
uint32_t n_handlers_requested;
|
|
|
|
|
uint32_t n_revalidators_requested;
|
|
|
|
|
bool forced = false;
|
|
|
|
|
|
|
|
|
|
if (dpif_number_handlers_required(udpif->dpif, &n_handlers_requested)) {
|
|
|
|
|
forced = true;
|
|
|
|
|
if (!n_revalidators_) {
|
2024-02-20 10:31:34 +01:00
|
|
|
|
n_revalidators_requested = (n_handlers_requested
|
|
|
|
|
? n_handlers_requested
|
|
|
|
|
: MAX(count_cpu_cores(), 2)) / 4 + 1;
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
|
} else {
|
|
|
|
|
n_revalidators_requested = n_revalidators_;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
int threads = MAX(count_cpu_cores(), 2);
|
|
|
|
|
|
|
|
|
|
n_revalidators_requested = MAX(n_revalidators_, 0);
|
|
|
|
|
n_handlers_requested = MAX(n_handlers_, 0);
|
|
|
|
|
|
|
|
|
|
if (!n_revalidators_requested) {
|
|
|
|
|
n_revalidators_requested = n_handlers_requested
|
|
|
|
|
? MAX(threads - (int) n_handlers_requested, 1)
|
|
|
|
|
: threads / 4 + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!n_handlers_requested) {
|
|
|
|
|
n_handlers_requested = MAX(threads -
|
|
|
|
|
(int) n_revalidators_requested, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
|
if (udpif->n_handlers != n_handlers_requested
|
|
|
|
|
|| udpif->n_revalidators != n_revalidators_requested) {
|
|
|
|
|
if (forced) {
|
|
|
|
|
VLOG_INFO("Overriding n-handler-threads to %u, setting "
|
|
|
|
|
"n-revalidator-threads to %u", n_handlers_requested,
|
|
|
|
|
n_revalidators_requested);
|
|
|
|
|
} else {
|
|
|
|
|
VLOG_INFO("Setting n-handler-threads to %u, setting "
|
|
|
|
|
"n-revalidator-threads to %u", n_handlers_requested,
|
|
|
|
|
n_revalidators_requested);
|
|
|
|
|
}
|
2020-01-09 12:49:44 -08:00
|
|
|
|
udpif_stop_threads(udpif, true);
|
2014-04-21 20:05:08 -07:00
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
|
2014-04-21 20:05:08 -07:00
|
|
|
|
if (!udpif->handlers && !udpif->revalidators) {
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
|
VLOG_INFO("Starting %u threads", n_handlers_requested +
|
|
|
|
|
n_revalidators_requested);
|
2014-05-09 14:42:30 -07:00
|
|
|
|
int error;
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
|
error = dpif_handlers_set(udpif->dpif, n_handlers_requested);
|
2014-05-09 14:42:30 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
VLOG_ERR("failed to configure handlers in dpif %s: %s",
|
|
|
|
|
dpif_name(udpif->dpif), ovs_strerror(error));
|
|
|
|
|
return;
|
|
|
|
|
}
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
|
udpif_start_threads(udpif, n_handlers_requested,
|
|
|
|
|
n_revalidators_requested);
|
2014-04-21 20:05:08 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Notifies 'udpif' that something changed which may render previous
|
|
|
|
|
* xlate_actions() results invalid. */
|
|
|
|
|
void
|
|
|
|
|
udpif_revalidate(struct udpif *udpif)
|
|
|
|
|
{
|
2013-09-17 14:35:53 -07:00
|
|
|
|
seq_change(udpif->reval_seq);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-09-12 17:42:23 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Returns a seq which increments every time 'udpif' pulls stats from the
|
|
|
|
|
* datapath. Callers can use this to get a sense of when might be a good time
|
|
|
|
|
* to do periodic work which relies on relatively up to date statistics. */
|
|
|
|
|
struct seq *
|
|
|
|
|
udpif_dump_seq(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
return udpif->dump_seq;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-11-20 17:41:02 -08:00
|
|
|
|
void
|
|
|
|
|
udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
|
|
|
|
|
{
|
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
simap_increase(usage, "handlers", udpif->n_handlers);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
simap_increase(usage, "revalidators", udpif->n_revalidators);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
for (i = 0; i < N_UMAPS; i++) {
|
2014-05-05 15:44:40 +12:00
|
|
|
|
simap_increase(usage, "udpif keys", cmap_count(&udpif->ukeys[i].cmap));
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-11-20 17:41:02 -08:00
|
|
|
|
}
|
|
|
|
|
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
/* Remove flows from a single datapath. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
void
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush(struct udpif *udpif)
|
|
|
|
|
{
|
2021-07-16 06:17:34 -04:00
|
|
|
|
uint32_t n_handlers_ = udpif->n_handlers;
|
|
|
|
|
uint32_t n_revalidators_ = udpif->n_revalidators;
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
|
2020-01-09 12:49:44 -08:00
|
|
|
|
udpif_stop_threads(udpif, true);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
dpif_flow_flush(udpif->dpif);
|
2018-02-27 17:34:14 -08:00
|
|
|
|
udpif_start_threads(udpif, n_handlers_, n_revalidators_);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Removes all flows from all datapaths. */
|
|
|
|
|
static void
|
|
|
|
|
udpif_flush_all_datapaths(void)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush(udpif);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
|
2014-12-19 09:54:38 -08:00
|
|
|
|
static bool
|
|
|
|
|
udpif_use_ufid(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
bool enable;
|
|
|
|
|
|
|
|
|
|
atomic_read_relaxed(&enable_ufid, &enable);
|
2017-06-16 23:39:31 -07:00
|
|
|
|
return enable && udpif->backer->rt_support.ufid;
|
2014-12-19 09:54:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-05-14 16:19:34 +09:00
|
|
|
|
static unsigned long
|
2014-01-22 06:50:49 +00:00
|
|
|
|
udpif_get_n_flows(struct udpif *udpif)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-01-22 06:50:49 +00:00
|
|
|
|
long long int time, now;
|
2014-05-14 16:19:34 +09:00
|
|
|
|
unsigned long flow_count;
|
2014-01-22 06:50:49 +00:00
|
|
|
|
|
|
|
|
|
now = time_msec();
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_read_relaxed(&udpif->n_flows_timestamp, &time);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
if (time < now - 100 && !ovs_mutex_trylock(&udpif->n_flows_mutex)) {
|
|
|
|
|
struct dpif_dp_stats stats;
|
|
|
|
|
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_store_relaxed(&udpif->n_flows_timestamp, now);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
dpif_get_dp_stats(udpif->dpif, &stats);
|
|
|
|
|
flow_count = stats.n_flows;
|
2023-02-27 16:30:11 +01:00
|
|
|
|
|
|
|
|
|
if (!dpif_synced_dp_layers(udpif->dpif)) {
|
|
|
|
|
/* If the dpif layer does not sync the flows, we need to include
|
|
|
|
|
* the hardware offloaded flows separately. */
|
|
|
|
|
uint64_t hw_flows;
|
|
|
|
|
|
|
|
|
|
if (!dpif_get_n_offloaded_flows(udpif->dpif, &hw_flows)) {
|
|
|
|
|
flow_count += hw_flows;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_store_relaxed(&udpif->n_flows, flow_count);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
ovs_mutex_unlock(&udpif->n_flows_mutex);
|
|
|
|
|
} else {
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_read_relaxed(&udpif->n_flows, &flow_count);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
}
|
|
|
|
|
return flow_count;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-05-20 21:50:19 -07:00
|
|
|
|
/* The upcall handler thread tries to read a batch of UPCALL_MAX_BATCH
|
2014-02-26 23:03:24 -08:00
|
|
|
|
* upcalls from dpif, processes the batch and installs corresponding flows
|
|
|
|
|
* in dpif. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static void *
|
2013-09-24 15:04:04 -07:00
|
|
|
|
udpif_upcall_handler(void *arg)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
|
|
|
|
struct handler *handler = arg;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
struct udpif *udpif = handler->udpif;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-12-27 16:29:24 -08:00
|
|
|
|
while (!latch_is_set(&handler->udpif->exit_latch)) {
|
2014-07-25 13:54:24 +12:00
|
|
|
|
if (recv_upcalls(handler)) {
|
|
|
|
|
poll_immediate_wake();
|
|
|
|
|
} else {
|
2014-02-26 23:03:24 -08:00
|
|
|
|
dpif_recv_wait(udpif->dpif, handler->handler_id);
|
|
|
|
|
latch_wait(&udpif->exit_latch);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-07-25 13:54:24 +12:00
|
|
|
|
poll_block();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2013-12-27 16:29:24 -08:00
|
|
|
|
|
|
|
|
|
return NULL;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
static size_t
|
|
|
|
|
recv_upcalls(struct handler *handler)
|
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif = handler->udpif;
|
|
|
|
|
uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8];
|
|
|
|
|
struct ofpbuf recv_bufs[UPCALL_MAX_BATCH];
|
2014-08-15 00:59:36 -07:00
|
|
|
|
struct dpif_upcall dupcalls[UPCALL_MAX_BATCH];
|
2014-08-06 18:49:44 -07:00
|
|
|
|
struct upcall upcalls[UPCALL_MAX_BATCH];
|
2014-10-10 14:41:10 -07:00
|
|
|
|
struct flow flows[UPCALL_MAX_BATCH];
|
2014-08-06 18:49:44 -07:00
|
|
|
|
size_t n_upcalls, i;
|
|
|
|
|
|
|
|
|
|
n_upcalls = 0;
|
|
|
|
|
while (n_upcalls < UPCALL_MAX_BATCH) {
|
|
|
|
|
struct ofpbuf *recv_buf = &recv_bufs[n_upcalls];
|
2014-08-15 00:59:36 -07:00
|
|
|
|
struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
|
2014-08-06 18:49:44 -07:00
|
|
|
|
struct upcall *upcall = &upcalls[n_upcalls];
|
2014-10-10 14:41:10 -07:00
|
|
|
|
struct flow *flow = &flows[n_upcalls];
|
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.
The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-11-15 10:58:59 +08:00
|
|
|
|
unsigned int mru = 0;
|
2022-09-01 17:42:49 +02:00
|
|
|
|
char *errorp = NULL;
|
2020-01-04 01:07:36 +01:00
|
|
|
|
uint64_t hash = 0;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
2014-08-14 15:48:00 -07:00
|
|
|
|
ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
|
2014-08-06 18:49:44 -07:00
|
|
|
|
sizeof recv_stubs[n_upcalls]);
|
2014-08-15 00:59:36 -07:00
|
|
|
|
if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) {
|
2014-08-06 18:49:44 -07:00
|
|
|
|
ofpbuf_uninit(recv_buf);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
upcall->fitness = odp_flow_key_to_flow(dupcall->key, dupcall->key_len,
|
2018-12-14 18:16:55 -08:00
|
|
|
|
flow, NULL);
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
if (upcall->fitness == ODP_FIT_ERROR) {
|
2014-08-06 18:49:44 -07:00
|
|
|
|
goto free_dupcall;
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-26 15:52:34 -08:00
|
|
|
|
if (dupcall->mru) {
|
|
|
|
|
mru = nl_attr_get_u16(dupcall->mru);
|
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.
The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-11-15 10:58:59 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dupcall->hash) {
|
2020-01-04 01:07:36 +01:00
|
|
|
|
hash = nl_attr_get_u64(dupcall->hash);
|
2015-02-26 15:52:34 -08:00
|
|
|
|
}
|
|
|
|
|
|
2014-08-15 00:59:36 -07:00
|
|
|
|
error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
|
2015-02-26 15:52:34 -08:00
|
|
|
|
dupcall->type, dupcall->userdata, flow, mru,
|
2022-09-01 17:42:49 +02:00
|
|
|
|
&dupcall->ufid, PMD_ID_NULL, &errorp);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
if (error == ENODEV) {
|
|
|
|
|
/* Received packet on datapath port for which we couldn't
|
|
|
|
|
* associate an ofproto. This can happen if a port is removed
|
|
|
|
|
* while traffic is being received. Print a rate-limited
|
|
|
|
|
* message in case it happens frequently. */
|
2014-08-15 00:59:36 -07:00
|
|
|
|
dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key,
|
2014-09-24 16:26:35 +12:00
|
|
|
|
dupcall->key_len, NULL, 0, NULL, 0,
|
2014-10-12 18:18:47 -07:00
|
|
|
|
&dupcall->ufid, PMD_ID_NULL, NULL);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
|
2022-09-01 17:42:49 +02:00
|
|
|
|
"port %"PRIu32"%s%s%s", flow->in_port.odp_port,
|
|
|
|
|
errorp ? " (" : "", errorp ? errorp : "",
|
|
|
|
|
errorp ? ")" : "");
|
2014-08-06 18:49:44 -07:00
|
|
|
|
}
|
2022-09-01 17:42:49 +02:00
|
|
|
|
free(errorp);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
goto free_dupcall;
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-15 00:59:36 -07:00
|
|
|
|
upcall->key = dupcall->key;
|
|
|
|
|
upcall->key_len = dupcall->key_len;
|
2014-09-24 15:24:39 +12:00
|
|
|
|
upcall->ufid = &dupcall->ufid;
|
2020-01-04 01:07:36 +01:00
|
|
|
|
upcall->hash = hash;
|
dpif-netlink: Provide original upcall pid in 'execute' commands.
When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall. With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet. This allows us to send
packets from the same traffic flow through the same handler.
The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.
While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again. And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.
However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread. So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.
The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port. In real setups the
number of recirculations can go up to 4 or 5, sometimes more.
There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet. So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.
For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2. Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets. Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.
Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID. This way the upcall triggered during the
execution will go to the same handler. Packets will be enqueued to
the same socket and re-injected in the same order. This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.
The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.
Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-07-08 13:34:02 +02:00
|
|
|
|
upcall->pid = dupcall->pid;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2014-08-17 20:19:36 -07:00
|
|
|
|
upcall->out_tun_key = dupcall->out_tun_key;
|
2015-07-22 15:49:51 -07:00
|
|
|
|
upcall->actions = dupcall->actions;
|
2014-08-17 20:19:36 -07:00
|
|
|
|
|
2015-02-22 03:21:09 -08:00
|
|
|
|
pkt_metadata_from_flow(&dupcall->packet.md, flow);
|
|
|
|
|
flow_extract(&dupcall->packet, flow);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
error = process_upcall(udpif, upcall,
|
|
|
|
|
&upcall->odp_actions, &upcall->wc);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
goto cleanup;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n_upcalls++;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
|
upcall_uninit(upcall);
|
|
|
|
|
free_dupcall:
|
2015-02-22 03:21:09 -08:00
|
|
|
|
dp_packet_uninit(&dupcall->packet);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
ofpbuf_uninit(recv_buf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (n_upcalls) {
|
|
|
|
|
handle_upcalls(handler->udpif, upcalls, n_upcalls);
|
|
|
|
|
for (i = 0; i < n_upcalls; i++) {
|
2015-02-22 03:21:09 -08:00
|
|
|
|
dp_packet_uninit(&dupcalls[i].packet);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
ofpbuf_uninit(&recv_bufs[i]);
|
|
|
|
|
upcall_uninit(&upcalls[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return n_upcalls;
|
|
|
|
|
}
|
|
|
|
|
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
static void
|
|
|
|
|
udpif_run_flow_rebalance(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
long long int now = 0;
|
|
|
|
|
|
|
|
|
|
/* Don't rebalance if OFFL_REBAL_INTVL_MSEC have not elapsed */
|
|
|
|
|
now = time_msec();
|
|
|
|
|
if (now < udpif->offload_rebalance_time + OFFL_REBAL_INTVL_MSEC) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!netdev_any_oor()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VLOG_DBG("Offload rebalance: Found OOR netdevs");
|
|
|
|
|
udpif->offload_rebalance_time = now;
|
|
|
|
|
udpif_flow_rebalance(udpif);
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void *
|
|
|
|
|
udpif_revalidator(void *arg)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Used by all revalidators. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator *revalidator = arg;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct udpif *udpif = revalidator->udpif;
|
|
|
|
|
bool leader = revalidator == &udpif->revalidators[0];
|
|
|
|
|
|
|
|
|
|
/* Used only by the leader. */
|
|
|
|
|
long long int start_time = 0;
|
|
|
|
|
uint64_t last_reval_seq = 0;
|
|
|
|
|
size_t n_flows = 0;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
revalidator->id = ovsthread_id_self();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
for (;;) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (leader) {
|
|
|
|
|
uint64_t reval_seq;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
recirc_run(); /* Recirculation cleanup. */
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
reval_seq = seq_read(udpif->reval_seq);
|
|
|
|
|
last_reval_seq = reval_seq;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
n_flows = udpif_get_n_flows(udpif);
|
|
|
|
|
udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
|
|
|
|
|
udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;
|
|
|
|
|
|
2015-08-29 06:09:45 +00:00
|
|
|
|
/* Only the leader checks the pause latch to prevent a race where
|
|
|
|
|
* some threads think it's false and proceed to block on
|
|
|
|
|
* reval_barrier and others think it's true and block indefinitely
|
|
|
|
|
* on the pause_barrier */
|
|
|
|
|
udpif->pause = latch_is_set(&udpif->pause_latch);
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Only the leader checks the exit latch to prevent a race where
|
|
|
|
|
* some threads think it's true and exit and others think it's
|
|
|
|
|
* false and block indefinitely on the reval_barrier */
|
|
|
|
|
udpif->reval_exit = latch_is_set(&udpif->exit_latch);
|
|
|
|
|
|
|
|
|
|
start_time = time_msec();
|
ofproto-dpif-upcall: Pause revalidators when purging.
This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.
A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.
This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:
2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
packet_type(ns=0,id=0),
eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
failed to flow_get (No such file or directory)
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""
To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.
Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:
2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
failed to flow_get (No such file or directory)
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9
To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.
As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.
Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-10-18 16:23:53 +02:00
|
|
|
|
if (!udpif->reval_exit && !udpif->pause) {
|
2014-10-06 11:14:08 +13:00
|
|
|
|
bool terse_dump;
|
|
|
|
|
|
2014-12-19 09:54:38 -08:00
|
|
|
|
terse_dump = udpif_use_ufid(udpif);
|
2017-06-13 18:03:49 +03:00
|
|
|
|
udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump,
|
|
|
|
|
NULL);
|
2023-01-23 12:03:29 +01:00
|
|
|
|
OVS_USDT_PROBE(udpif_revalidator, start_dump, udpif, n_flows);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-upcall: Pause revalidators when purging.
This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.
A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.
This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:
2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
packet_type(ns=0,id=0),
eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
failed to flow_get (No such file or directory)
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""
To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.
Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:
2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
failed to flow_get (No such file or directory)
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9
To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.
As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.
Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-10-18 16:23:53 +02:00
|
|
|
|
/* Wait for the leader to reach this point. */
|
2014-05-29 15:37:37 -07:00
|
|
|
|
ovs_barrier_block(&udpif->reval_barrier);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
if (udpif->pause) {
|
|
|
|
|
revalidator_pause(revalidator);
|
ofproto-dpif-upcall: Pause revalidators when purging.
This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.
A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.
This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:
2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
packet_type(ns=0,id=0),
eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
failed to flow_get (No such file or directory)
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""
To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.
Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:
2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
failed to flow_get (No such file or directory)
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9
To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.
As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.
Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-10-18 16:23:53 +02:00
|
|
|
|
if (!udpif->reval_exit) {
|
|
|
|
|
/* The main thread resumed all validators, but the leader
|
|
|
|
|
* didn't start the dump, go to next iteration. */
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-08-29 06:09:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (udpif->reval_exit) {
|
|
|
|
|
break;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
revalidate(revalidator);
|
|
|
|
|
|
|
|
|
|
/* Wait for all flows to have been dumped before we garbage collect. */
|
2014-05-29 15:37:37 -07:00
|
|
|
|
ovs_barrier_block(&udpif->reval_barrier);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
revalidator_sweep(revalidator);
|
|
|
|
|
|
|
|
|
|
/* Wait for all revalidators to finish garbage collection. */
|
2014-05-29 15:37:37 -07:00
|
|
|
|
ovs_barrier_block(&udpif->reval_barrier);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
if (leader) {
|
2014-08-29 10:34:53 -07:00
|
|
|
|
unsigned int flow_limit;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
long long int duration;
|
|
|
|
|
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
|
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
dpif_flow_dump_destroy(udpif->dump);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
seq_change(udpif->dump_seq);
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
if (netdev_is_offload_rebalance_policy_enabled()) {
|
|
|
|
|
udpif_run_flow_rebalance(udpif);
|
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
duration = MAX(time_msec() - start_time, 1);
|
|
|
|
|
udpif->dump_duration = duration;
|
|
|
|
|
if (duration > 2000) {
|
|
|
|
|
flow_limit /= duration / 1000;
|
2024-01-10 12:25:56 +01:00
|
|
|
|
COVERAGE_INC(upcall_flow_limit_scaled);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
} else if (duration > 1300) {
|
|
|
|
|
flow_limit = flow_limit * 3 / 4;
|
2024-01-10 12:25:56 +01:00
|
|
|
|
COVERAGE_INC(upcall_flow_limit_reduced);
|
2018-08-03 06:04:23 +05:30
|
|
|
|
} else if (duration < 1000 &&
|
|
|
|
|
flow_limit < n_flows * 1000 / duration) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
flow_limit += 1000;
|
2024-01-10 12:25:56 +01:00
|
|
|
|
COVERAGE_INC(upcall_flow_limit_grew);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
|
|
|
|
flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000));
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_store_relaxed(&udpif->flow_limit, flow_limit);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (duration > 2000) {
|
2024-01-10 12:25:55 +01:00
|
|
|
|
VLOG_WARN("Spent an unreasonably long %lldms dumping flows",
|
2014-04-10 07:14:08 +00:00
|
|
|
|
duration);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2023-01-23 12:03:29 +01:00
|
|
|
|
OVS_USDT_PROBE(udpif_revalidator, sweep_done, udpif, n_flows,
|
|
|
|
|
MIN(ofproto_max_idle, ofproto_max_revalidator));
|
|
|
|
|
|
2019-07-21 11:34:21 +03:00
|
|
|
|
poll_timer_wait_until(start_time + MIN(ofproto_max_idle,
|
|
|
|
|
ofproto_max_revalidator));
|
2014-04-10 07:14:08 +00:00
|
|
|
|
seq_wait(udpif->reval_seq, last_reval_seq);
|
|
|
|
|
latch_wait(&udpif->exit_latch);
|
2015-08-29 06:09:45 +00:00
|
|
|
|
latch_wait(&udpif->pause_latch);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
poll_block();
|
2016-09-27 12:55:46 -07:00
|
|
|
|
|
|
|
|
|
if (!latch_is_set(&udpif->pause_latch) &&
|
|
|
|
|
!latch_is_set(&udpif->exit_latch)) {
|
|
|
|
|
long long int now = time_msec();
|
|
|
|
|
/* Block again if we are woken up within 5ms of the last start
|
|
|
|
|
* time. */
|
|
|
|
|
start_time += 5;
|
|
|
|
|
|
|
|
|
|
if (now < start_time) {
|
|
|
|
|
poll_timer_wait_until(start_time);
|
|
|
|
|
latch_wait(&udpif->exit_latch);
|
|
|
|
|
latch_wait(&udpif->pause_latch);
|
|
|
|
|
poll_block();
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2018-11-01 00:51:41 +05:30
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static enum upcall_type
|
2017-09-29 17:44:08 -07:00
|
|
|
|
classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
|
|
|
|
|
struct user_action_cookie *cookie)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
|
|
|
|
/* First look at the upcall type. */
|
2014-08-06 18:49:44 -07:00
|
|
|
|
switch (type) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
case DPIF_UC_ACTION:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case DPIF_UC_MISS:
|
|
|
|
|
return MISS_UPCALL;
|
|
|
|
|
|
|
|
|
|
case DPIF_N_UC_TYPES:
|
|
|
|
|
default:
|
2014-08-06 18:49:44 -07:00
|
|
|
|
VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, type);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* "action" upcalls need a closer look. */
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (!userdata) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
VLOG_WARN_RL(&rl, "action upcall missing cookie");
|
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
2018-01-04 12:37:57 -08:00
|
|
|
|
|
|
|
|
|
size_t userdata_len = nl_attr_get_size(userdata);
|
2017-09-29 17:44:08 -07:00
|
|
|
|
if (userdata_len != sizeof *cookie) {
|
2013-11-25 23:38:48 -08:00
|
|
|
|
VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
|
2013-06-25 14:45:43 -07:00
|
|
|
|
userdata_len);
|
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
2017-09-29 17:44:08 -07:00
|
|
|
|
memcpy(cookie, nl_attr_get(userdata), sizeof *cookie);
|
|
|
|
|
if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return SFLOW_UPCALL;
|
2017-09-29 17:44:08 -07:00
|
|
|
|
} else if (cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
|
|
|
|
|
return SLOW_PATH_UPCALL;
|
|
|
|
|
} else if (cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return FLOW_SAMPLE_UPCALL;
|
2017-09-29 17:44:08 -07:00
|
|
|
|
} else if (cookie->type == USER_ACTION_COOKIE_IPFIX) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return IPFIX_UPCALL;
|
2017-07-05 15:17:52 -07:00
|
|
|
|
} else if (cookie->type == USER_ACTION_COOKIE_CONTROLLER) {
|
|
|
|
|
return CONTROLLER_UPCALL;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
} else {
|
|
|
|
|
VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
|
2017-09-29 17:44:08 -07:00
|
|
|
|
" and size %"PRIuSIZE, cookie->type, userdata_len);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Calculates slow path actions for 'xout'. 'buf' must statically be
|
|
|
|
|
* initialized with at least 128 bytes of space. */
|
|
|
|
|
static void
|
|
|
|
|
compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
|
2017-10-03 17:31:34 -07:00
|
|
|
|
odp_port_t odp_in_port, ofp_port_t ofp_in_port,
|
2017-07-05 15:17:52 -07:00
|
|
|
|
struct ofpbuf *buf, uint32_t meter_id,
|
|
|
|
|
struct uuid *ofproto_uuid)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2018-01-04 12:37:57 -08:00
|
|
|
|
struct user_action_cookie cookie;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
odp_port_t port;
|
|
|
|
|
uint32_t pid;
|
|
|
|
|
|
2019-07-25 18:11:13 +03:00
|
|
|
|
memset(&cookie, 0, sizeof cookie);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
|
2017-10-03 17:31:34 -07:00
|
|
|
|
cookie.ofp_in_port = ofp_in_port;
|
|
|
|
|
cookie.ofproto_uuid = *ofproto_uuid;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
cookie.slow_path.reason = xout->slow;
|
|
|
|
|
|
|
|
|
|
port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
|
|
|
|
|
? ODPP_NONE
|
|
|
|
|
: odp_in_port;
|
2018-09-25 15:14:13 -07:00
|
|
|
|
pid = dpif_port_get_pid(udpif->dpif, port);
|
2017-04-11 16:10:41 -07:00
|
|
|
|
|
|
|
|
|
size_t offset;
|
|
|
|
|
size_t ac_offset;
|
|
|
|
|
if (meter_id != UINT32_MAX) {
|
|
|
|
|
/* If slowpath meter is configured, generate clone(meter, userspace)
|
|
|
|
|
* action. */
|
|
|
|
|
offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SAMPLE);
|
|
|
|
|
nl_msg_put_u32(buf, OVS_SAMPLE_ATTR_PROBABILITY, UINT32_MAX);
|
|
|
|
|
ac_offset = nl_msg_start_nested(buf, OVS_SAMPLE_ATTR_ACTIONS);
|
|
|
|
|
nl_msg_put_u32(buf, OVS_ACTION_ATTR_METER, meter_id);
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-04 12:37:57 -08:00
|
|
|
|
odp_put_userspace_action(pid, &cookie, sizeof cookie,
|
2020-12-21 16:01:04 +01:00
|
|
|
|
ODPP_NONE, false, buf, NULL);
|
2017-04-11 16:10:41 -07:00
|
|
|
|
|
|
|
|
|
if (meter_id != UINT32_MAX) {
|
|
|
|
|
nl_msg_end_nested(buf, ac_offset);
|
|
|
|
|
nl_msg_end_nested(buf, offset);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-09-08 10:41:36 -07:00
|
|
|
|
/* If there is no error, the upcall must be destroyed with upcall_uninit()
|
|
|
|
|
* before quiescing, as the referred objects are guaranteed to exist only
|
|
|
|
|
* until the calling thread quiesces. Otherwise, do not call upcall_uninit()
|
|
|
|
|
* since the 'upcall->put_actions' remains uninitialized. */
|
2014-08-06 18:49:44 -07:00
|
|
|
|
static int
|
|
|
|
|
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
|
2015-02-22 03:21:09 -08:00
|
|
|
|
const struct dp_packet *packet, enum dpif_upcall_type type,
|
2014-09-24 15:24:39 +12:00
|
|
|
|
const struct nlattr *userdata, const struct flow *flow,
|
2015-02-26 15:52:34 -08:00
|
|
|
|
const unsigned int mru,
|
2022-09-01 17:42:49 +02:00
|
|
|
|
const ovs_u128 *ufid, const unsigned pmd_id,
|
|
|
|
|
char **errorp)
|
2014-08-06 18:49:44 -07:00
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
upcall->type = classify_upcall(type, userdata, &upcall->cookie);
|
|
|
|
|
if (upcall->type == BAD_UPCALL) {
|
|
|
|
|
return EAGAIN;
|
2017-10-03 17:31:34 -07:00
|
|
|
|
} else if (upcall->type == MISS_UPCALL) {
|
|
|
|
|
error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
|
2022-09-01 17:42:49 +02:00
|
|
|
|
&upcall->sflow, NULL, &upcall->ofp_in_port,
|
|
|
|
|
errorp);
|
2017-10-03 17:31:34 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
struct ofproto_dpif *ofproto
|
|
|
|
|
= ofproto_dpif_lookup_by_uuid(&upcall->cookie.ofproto_uuid);
|
|
|
|
|
if (!ofproto) {
|
2022-09-01 17:42:49 +02:00
|
|
|
|
if (errorp) {
|
|
|
|
|
*errorp = xstrdup("upcall could not find ofproto");
|
|
|
|
|
} else {
|
|
|
|
|
VLOG_INFO_RL(&rl, "upcall could not find ofproto");
|
|
|
|
|
}
|
2017-10-03 17:31:34 -07:00
|
|
|
|
return ENODEV;
|
|
|
|
|
}
|
|
|
|
|
upcall->ofproto = ofproto;
|
|
|
|
|
upcall->ipfix = ofproto->ipfix;
|
|
|
|
|
upcall->sflow = ofproto->sflow;
|
|
|
|
|
upcall->ofp_in_port = upcall->cookie.ofp_in_port;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
upcall->recirc = NULL;
|
|
|
|
|
upcall->have_recirc_ref = false;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
upcall->flow = flow;
|
|
|
|
|
upcall->packet = packet;
|
2014-09-24 15:24:39 +12:00
|
|
|
|
upcall->ufid = ufid;
|
2014-10-12 18:18:47 -07:00
|
|
|
|
upcall->pmd_id = pmd_id;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
|
|
|
|
|
sizeof upcall->odp_actions_stub);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
ofpbuf_init(&upcall->put_actions, 0);
|
|
|
|
|
|
|
|
|
|
upcall->xout_initialized = false;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
upcall->ukey_persists = false;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2014-07-25 13:54:24 +12:00
|
|
|
|
upcall->ukey = NULL;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
upcall->key = NULL;
|
|
|
|
|
upcall->key_len = 0;
|
2015-02-26 15:52:34 -08:00
|
|
|
|
upcall->mru = mru;
|
dpif-netlink: Provide original upcall pid in 'execute' commands.
When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall. With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet. This allows us to send
packets from the same traffic flow through the same handler.
The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.
While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again. And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.
However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread. So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.
The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port. In real setups the
number of recirculations can go up to 4 or 5, sometimes more.
There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet. So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.
For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2. Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets. Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.
Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID. This way the upcall triggered during the
execution will go to the same handler. Packets will be enqueued to
the same socket and re-injected in the same order. This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.
The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.
Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-07-08 13:34:02 +02:00
|
|
|
|
upcall->pid = 0;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2014-08-17 20:19:36 -07:00
|
|
|
|
upcall->out_tun_key = NULL;
|
2015-07-17 21:37:02 -07:00
|
|
|
|
upcall->actions = NULL;
|
2014-08-17 20:19:36 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-20 21:50:19 -07:00
|
|
|
|
static void
|
2014-08-06 18:49:44 -07:00
|
|
|
|
upcall_xlate(struct udpif *udpif, struct upcall *upcall,
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct ofpbuf *odp_actions, struct flow_wildcards *wc)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-08-06 18:49:44 -07:00
|
|
|
|
struct dpif_flow_stats stats;
|
2018-02-28 16:32:27 -08:00
|
|
|
|
enum xlate_error xerr;
|
2014-05-22 10:53:27 -07:00
|
|
|
|
struct xlate_in xin;
|
2018-02-28 16:32:27 -08:00
|
|
|
|
struct ds output;
|
2014-05-20 21:50:19 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
stats.n_packets = 1;
|
2015-02-22 03:21:09 -08:00
|
|
|
|
stats.n_bytes = dp_packet_size(upcall->packet);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
stats.used = time_msec();
|
|
|
|
|
stats.tcp_flags = ntohs(upcall->flow->tcp_flags);
|
2014-05-20 21:50:19 -07:00
|
|
|
|
|
2016-09-14 16:51:27 -07:00
|
|
|
|
xlate_in_init(&xin, upcall->ofproto,
|
|
|
|
|
ofproto_dpif_get_tables_version(upcall->ofproto),
|
2017-10-03 17:31:34 -07:00
|
|
|
|
upcall->flow, upcall->ofp_in_port, NULL,
|
2015-07-31 13:34:16 -07:00
|
|
|
|
stats.tcp_flags, upcall->packet, wc, odp_actions);
|
2014-05-20 21:50:19 -07:00
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
if (upcall->type == MISS_UPCALL) {
|
2014-08-06 18:49:44 -07:00
|
|
|
|
xin.resubmit_stats = &stats;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (xin.frozen_state) {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* We may install a datapath flow only if we get a reference to the
|
|
|
|
|
* recirculation context (otherwise we could have recirculation
|
|
|
|
|
* upcalls using recirculation ID for which no context can be
|
|
|
|
|
* found). We may still execute the flow's actions even if we
|
|
|
|
|
* don't install the flow. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
upcall->recirc = recirc_id_node_from_state(xin.frozen_state);
|
2016-01-20 16:53:01 -08:00
|
|
|
|
upcall->have_recirc_ref = recirc_id_node_try_ref_rcu(upcall->recirc);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2014-05-20 21:50:19 -07:00
|
|
|
|
} else {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* For non-miss upcalls, we are either executing actions (one of which
|
|
|
|
|
* is an userspace action) for an upcall, in which case the stats have
|
|
|
|
|
* already been taken care of, or there's a flow in the datapath which
|
|
|
|
|
* this packet was accounted to. Presumably the revalidators will deal
|
2014-05-20 21:50:19 -07:00
|
|
|
|
* with pushing its stats eventually. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-07-25 13:54:24 +12:00
|
|
|
|
upcall->reval_seq = seq_read(udpif->reval_seq);
|
2016-06-09 13:18:45 -07:00
|
|
|
|
|
2018-02-28 16:32:27 -08:00
|
|
|
|
xerr = xlate_actions(&xin, &upcall->xout);
|
|
|
|
|
|
|
|
|
|
/* Translate again and log the ofproto trace for
|
|
|
|
|
* these two error types. */
|
|
|
|
|
if (xerr == XLATE_RECURSION_TOO_DEEP ||
|
|
|
|
|
xerr == XLATE_TOO_MANY_RESUBMITS) {
|
|
|
|
|
static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 1);
|
|
|
|
|
|
|
|
|
|
/* This is a huge log, so be conservative. */
|
|
|
|
|
if (!VLOG_DROP_WARN(&rll)) {
|
|
|
|
|
ds_init(&output);
|
|
|
|
|
ofproto_trace(upcall->ofproto, upcall->flow,
|
2022-08-26 16:48:53 +09:00
|
|
|
|
upcall->packet, NULL, 0, NULL, &output,
|
|
|
|
|
false);
|
2018-02-28 16:32:27 -08:00
|
|
|
|
VLOG_WARN("%s", ds_cstr(&output));
|
|
|
|
|
ds_destroy(&output);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-09 13:18:45 -07:00
|
|
|
|
if (wc) {
|
|
|
|
|
/* Convert the input port wildcard from OFP to ODP format. There's no
|
|
|
|
|
* real way to do this for arbitrary bitmasks since the numbering spaces
|
|
|
|
|
* aren't the same. However, flow translation always exact matches the
|
|
|
|
|
* whole thing, so we can do the same here. */
|
|
|
|
|
WC_MASK_FIELD(wc, in_port.odp_port);
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
upcall->xout_initialized = true;
|
|
|
|
|
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
if (upcall->fitness == ODP_FIT_TOO_LITTLE) {
|
|
|
|
|
upcall->xout.slow |= SLOW_MATCH;
|
|
|
|
|
}
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (!upcall->xout.slow) {
|
|
|
|
|
ofpbuf_use_const(&upcall->put_actions,
|
2015-07-31 13:34:16 -07:00
|
|
|
|
odp_actions->data, odp_actions->size);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
} else {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
/* upcall->put_actions already initialized by upcall_receive(). */
|
2018-09-25 15:14:13 -07:00
|
|
|
|
compose_slow_path(udpif, &upcall->xout,
|
2017-10-03 17:31:34 -07:00
|
|
|
|
upcall->flow->in_port.odp_port, upcall->ofp_in_port,
|
2017-07-05 15:17:52 -07:00
|
|
|
|
&upcall->put_actions,
|
|
|
|
|
upcall->ofproto->up.slowpath_meter_id,
|
2017-10-03 17:31:34 -07:00
|
|
|
|
&upcall->ofproto->uuid);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
}
|
2014-07-25 13:54:24 +12:00
|
|
|
|
|
2015-02-24 15:33:59 -08:00
|
|
|
|
/* This function is also called for slow-pathed flows. As we are only
|
|
|
|
|
* going to create new datapath flows for actual datapath misses, there is
|
|
|
|
|
* no point in creating a ukey otherwise. */
|
2017-09-29 17:44:08 -07:00
|
|
|
|
if (upcall->type == MISS_UPCALL) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
upcall->ukey = ukey_create_from_upcall(upcall, wc);
|
2015-02-24 15:33:59 -08:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 12:19:03 -07:00
|
|
|
|
static void
|
2014-08-06 18:49:44 -07:00
|
|
|
|
upcall_uninit(struct upcall *upcall)
|
2014-07-26 06:51:55 +00:00
|
|
|
|
{
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (upcall) {
|
|
|
|
|
if (upcall->xout_initialized) {
|
|
|
|
|
xlate_out_uninit(&upcall->xout);
|
|
|
|
|
}
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ofpbuf_uninit(&upcall->odp_actions);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
ofpbuf_uninit(&upcall->put_actions);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (upcall->ukey) {
|
|
|
|
|
if (!upcall->ukey_persists) {
|
|
|
|
|
ukey_delete__(upcall->ukey);
|
|
|
|
|
}
|
|
|
|
|
} else if (upcall->have_recirc_ref) {
|
|
|
|
|
/* The reference was transferred to the ukey if one was created. */
|
|
|
|
|
recirc_id_node_unref(upcall->recirc);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
2014-08-06 18:49:44 -07:00
|
|
|
|
}
|
2014-07-26 06:51:55 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-08-18 14:50:09 -07:00
|
|
|
|
/* If there are less flows than the limit, and this is a miss upcall which
|
|
|
|
|
*
|
|
|
|
|
* - Has no recirc_id, OR
|
|
|
|
|
* - Has a recirc_id and we can get a reference on the recirc ctx,
|
|
|
|
|
*
|
|
|
|
|
* Then we should install the flow (true). Otherwise, return false. */
|
|
|
|
|
static bool
|
|
|
|
|
should_install_flow(struct udpif *udpif, struct upcall *upcall)
|
|
|
|
|
{
|
|
|
|
|
unsigned int flow_limit;
|
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
if (upcall->type != MISS_UPCALL) {
|
2016-08-18 14:50:09 -07:00
|
|
|
|
return false;
|
|
|
|
|
} else if (upcall->recirc && !upcall->have_recirc_ref) {
|
2016-08-29 17:06:13 -04:00
|
|
|
|
VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
|
2016-08-18 14:50:09 -07:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
|
|
|
|
|
if (udpif_get_n_flows(udpif) >= flow_limit) {
|
2020-04-20 19:13:42 +05:30
|
|
|
|
COVERAGE_INC(upcall_flow_limit_hit);
|
2020-09-29 17:07:31 -03:00
|
|
|
|
VLOG_WARN_RL(&rl,
|
|
|
|
|
"upcall: datapath reached the dynamic limit of %u flows.",
|
|
|
|
|
flow_limit);
|
2016-08-18 14:50:09 -07:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 15:39:58 -07:00
|
|
|
|
static int
|
2015-02-22 03:21:09 -08:00
|
|
|
|
upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
|
2015-05-22 17:14:19 +01:00
|
|
|
|
unsigned pmd_id, enum dpif_upcall_type type,
|
2014-10-12 18:18:47 -07:00
|
|
|
|
const struct nlattr *userdata, struct ofpbuf *actions,
|
|
|
|
|
struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
|
2014-07-26 06:51:55 +00:00
|
|
|
|
{
|
2014-07-26 15:39:58 -07:00
|
|
|
|
struct udpif *udpif = aux;
|
|
|
|
|
struct upcall upcall;
|
|
|
|
|
bool megaflow;
|
|
|
|
|
int error;
|
2014-07-26 06:51:55 +00:00
|
|
|
|
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_read_relaxed(&enable_megaflows, &megaflow);
|
|
|
|
|
|
2014-07-26 15:39:58 -07:00
|
|
|
|
error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
|
2022-09-01 17:42:49 +02:00
|
|
|
|
flow, 0, ufid, pmd_id, NULL);
|
2014-07-26 15:39:58 -07:00
|
|
|
|
if (error) {
|
2014-09-08 10:41:36 -07:00
|
|
|
|
return error;
|
2014-07-26 06:51:55 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-02-26 11:10:11 +03:00
|
|
|
|
upcall.fitness = ODP_FIT_PERFECT;
|
2015-07-31 13:15:52 -07:00
|
|
|
|
error = process_upcall(udpif, &upcall, actions, wc);
|
2014-07-26 15:39:58 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2014-07-26 15:39:58 -07:00
|
|
|
|
if (upcall.xout.slow && put_actions) {
|
2015-03-02 17:29:44 -08:00
|
|
|
|
ofpbuf_put(put_actions, upcall.put_actions.data,
|
|
|
|
|
upcall.put_actions.size);
|
2014-07-26 15:39:58 -07:00
|
|
|
|
}
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2016-12-07 10:04:04 -08:00
|
|
|
|
if (OVS_UNLIKELY(!megaflow && wc)) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
flow_wildcards_init_for_packet(wc, flow);
|
2014-07-26 15:39:58 -07:00
|
|
|
|
}
|
2014-02-26 23:03:24 -08:00
|
|
|
|
|
2016-08-18 14:50:09 -07:00
|
|
|
|
if (!should_install_flow(udpif, &upcall)) {
|
2014-07-25 13:54:24 +12:00
|
|
|
|
error = ENOSPC;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
goto out;
|
2014-07-26 06:51:55 +00:00
|
|
|
|
}
|
2014-07-26 15:39:58 -07:00
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) {
|
|
|
|
|
error = ENOSPC;
|
|
|
|
|
}
|
2014-07-26 15:39:58 -07:00
|
|
|
|
out:
|
2014-07-25 13:54:24 +12:00
|
|
|
|
if (!error) {
|
|
|
|
|
upcall.ukey_persists = true;
|
|
|
|
|
}
|
2014-07-26 15:39:58 -07:00
|
|
|
|
upcall_uninit(&upcall);
|
|
|
|
|
return error;
|
2014-07-26 06:51:55 +00:00
|
|
|
|
}
|
2013-09-24 15:04:04 -07:00
|
|
|
|
|
2017-07-28 07:17:44 +01:00
|
|
|
|
static size_t
|
|
|
|
|
dpif_get_actions(struct udpif *udpif, struct upcall *upcall,
|
|
|
|
|
const struct nlattr **actions)
|
|
|
|
|
{
|
|
|
|
|
size_t actions_len = 0;
|
|
|
|
|
|
|
|
|
|
if (upcall->actions) {
|
|
|
|
|
/* Actions were passed up from datapath. */
|
|
|
|
|
*actions = nl_attr_get(upcall->actions);
|
|
|
|
|
actions_len = nl_attr_get_size(upcall->actions);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (actions_len == 0) {
|
|
|
|
|
/* Lookup actions in userspace cache. */
|
|
|
|
|
struct udpif_key *ukey = ukey_lookup(udpif, upcall->ufid,
|
|
|
|
|
upcall->pmd_id);
|
|
|
|
|
if (ukey) {
|
|
|
|
|
ukey_get_actions(ukey, actions, &actions_len);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return actions_len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
|
dpif_read_actions(struct udpif *udpif, struct upcall *upcall,
|
|
|
|
|
const struct flow *flow, enum upcall_type type,
|
|
|
|
|
void *upcall_data)
|
|
|
|
|
{
|
|
|
|
|
const struct nlattr *actions = NULL;
|
|
|
|
|
size_t actions_len = dpif_get_actions(udpif, upcall, &actions);
|
|
|
|
|
|
|
|
|
|
if (!actions || !actions_len) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
|
case SFLOW_UPCALL:
|
2018-01-09 19:54:31 +01:00
|
|
|
|
dpif_sflow_read_actions(flow, actions, actions_len, upcall_data, true);
|
2017-07-28 07:17:44 +01:00
|
|
|
|
break;
|
|
|
|
|
case FLOW_SAMPLE_UPCALL:
|
|
|
|
|
case IPFIX_UPCALL:
|
|
|
|
|
dpif_ipfix_read_actions(flow, actions, actions_len, upcall_data);
|
|
|
|
|
break;
|
|
|
|
|
case BAD_UPCALL:
|
|
|
|
|
case MISS_UPCALL:
|
2017-09-29 17:44:08 -07:00
|
|
|
|
case SLOW_PATH_UPCALL:
|
2017-07-05 15:17:52 -07:00
|
|
|
|
case CONTROLLER_UPCALL:
|
2017-07-28 07:17:44 +01:00
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return actions_len;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 12:19:03 -07:00
|
|
|
|
static int
|
2014-08-06 18:49:44 -07:00
|
|
|
|
process_upcall(struct udpif *udpif, struct upcall *upcall,
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct ofpbuf *odp_actions, struct flow_wildcards *wc)
|
2014-07-26 06:51:55 +00:00
|
|
|
|
{
|
2015-02-22 03:21:09 -08:00
|
|
|
|
const struct dp_packet *packet = upcall->packet;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
const struct flow *flow = upcall->flow;
|
2017-07-28 07:17:44 +01:00
|
|
|
|
size_t actions_len = 0;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
switch (upcall->type) {
|
2014-08-06 18:49:44 -07:00
|
|
|
|
case MISS_UPCALL:
|
2017-09-29 17:44:08 -07:00
|
|
|
|
case SLOW_PATH_UPCALL:
|
2015-07-31 13:15:52 -07:00
|
|
|
|
upcall_xlate(udpif, upcall, odp_actions, wc);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
return 0;
|
2013-09-24 15:04:04 -07:00
|
|
|
|
|
2014-07-26 06:51:55 +00:00
|
|
|
|
case SFLOW_UPCALL:
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (upcall->sflow) {
|
2015-07-17 21:37:02 -07:00
|
|
|
|
struct dpif_sflow_actions sflow_actions;
|
2017-07-28 07:17:44 +01:00
|
|
|
|
|
2015-07-17 21:37:02 -07:00
|
|
|
|
memset(&sflow_actions, 0, sizeof sflow_actions);
|
2017-07-28 07:17:44 +01:00
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
actions_len = dpif_read_actions(udpif, upcall, flow,
|
|
|
|
|
upcall->type, &sflow_actions);
|
2014-08-06 18:49:44 -07:00
|
|
|
|
dpif_sflow_received(upcall->sflow, packet, flow,
|
2017-09-29 17:44:08 -07:00
|
|
|
|
flow->in_port.odp_port, &upcall->cookie,
|
2015-07-17 21:37:02 -07:00
|
|
|
|
actions_len > 0 ? &sflow_actions : NULL);
|
2014-07-26 06:51:55 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2014-07-26 06:51:55 +00:00
|
|
|
|
case IPFIX_UPCALL:
|
|
|
|
|
case FLOW_SAMPLE_UPCALL:
|
2014-08-06 18:49:44 -07:00
|
|
|
|
if (upcall->ipfix) {
|
ipfix: Support tunnel information for Flow IPFIX.
Add support to export tunnel information for flow-based IPFIX.
The original steps to configure flow level IPFIX:
1) Create a new record in Flow_Sample_Collector_Set table:
'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
2) Add IPFIX configuration which is referred by corresponding
row in Flow_Sample_Collector_Set table:
'ovs-vsctl -- set Flow_Sample_Collector_Set
"Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX
targets=\"IP:4739\" obs_domain_id=123 obs_point_id=456
cache_active_timeout=60 cache_max_flows=13'
3) Add sample action to the flows:
'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,
obs_domain_id=123,obs_point_id=456')',output:3'
NXAST_SAMPLE action was used in step 3. In order to support exporting tunnel
information, the NXAST_SAMPLE2 action was added and with NXAST_SAMPLE2 action
in this patch, the step 3 should be configured like below:
'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
obs_point_id=456,sampling_port=3')',output:3'
'sampling_port' can be equal to ingress port or one of egress ports. If sampling
port is equal to output port and the output port is a tunnel port,
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT will be set in the datapath flow sample action.
When flow sample action upcall happens, tunnel information will be retrieved from
the datapath and then IPFIX can export egress tunnel port information. If
samping_port=65535 (OFPP_NONE), flow-based IPFIX will keep the same behavior
as before.
This patch mainly do three tasks:
1) Add a new flow sample action NXAST_SAMPLE2 to support exporting
tunnel information. NXAST_SAMPLE2 action has a new added field
'sampling_port'.
2) Use 'other_configure: enable-tunnel-sampling' to enable or disable
exporting tunnel information.
3) If 'sampling_port' is equal to output port and output port is a tunnel
port, the translation of OpenFlow "sample" action should first emit
set(tunnel(...)), then the sample action itself. It makes sure the
egress tunnel information can be sampled.
4) Add a test of flow-based IPFIX for tunnel set.
How to test flow-based IPFIX:
1) Setup a test environment with two Linux host with Docker supported
2) Create a Docker container and a GRE tunnel port on each host
3) Use ovs-docker to add the container on the bridge
4) Listen on port 4739 on the collector machine and use wireshark to filter
'cflow' packets.
5) Configure flow-based IPFIX:
- 'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
- 'ovs-vsctl -- set Flow_Sample_Collector_Set
"Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX \
targets=\"IP:4739\" cache_active_timeout=60 cache_max_flows=13 \
other_config:enable-tunnel-sampling=true'
- 'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
obs_point_id=456,sampling_port=3')',output:3'
Note: The in-port is container port. The output port and sampling_port
are both open flow port and the output port is a GRE tunnel port.
6) Ping from the container whose host enabled flow-based IPFIX.
7) Get the IPFIX template pakcets and IPFIX information packets.
Signed-off-by: Benli Ye <daniely@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2016-06-14 16:53:34 +08:00
|
|
|
|
struct flow_tnl output_tunnel_key;
|
2017-07-28 07:17:44 +01:00
|
|
|
|
struct dpif_ipfix_actions ipfix_actions;
|
2014-07-26 06:51:55 +00:00
|
|
|
|
|
2017-07-28 07:17:44 +01:00
|
|
|
|
memset(&ipfix_actions, 0, sizeof ipfix_actions);
|
2014-07-26 06:51:55 +00:00
|
|
|
|
|
ipfix: Support tunnel information for Flow IPFIX.
Add support to export tunnel information for flow-based IPFIX.
The original steps to configure flow level IPFIX:
1) Create a new record in Flow_Sample_Collector_Set table:
'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
2) Add IPFIX configuration which is referred by corresponding
row in Flow_Sample_Collector_Set table:
'ovs-vsctl -- set Flow_Sample_Collector_Set
"Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX
targets=\"IP:4739\" obs_domain_id=123 obs_point_id=456
cache_active_timeout=60 cache_max_flows=13'
3) Add sample action to the flows:
'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,
obs_domain_id=123,obs_point_id=456')',output:3'
NXAST_SAMPLE action was used in step 3. In order to support exporting tunnel
information, the NXAST_SAMPLE2 action was added and with NXAST_SAMPLE2 action
in this patch, the step 3 should be configured like below:
'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
obs_point_id=456,sampling_port=3')',output:3'
'sampling_port' can be equal to ingress port or one of egress ports. If sampling
port is equal to output port and the output port is a tunnel port,
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT will be set in the datapath flow sample action.
When flow sample action upcall happens, tunnel information will be retrieved from
the datapath and then IPFIX can export egress tunnel port information. If
samping_port=65535 (OFPP_NONE), flow-based IPFIX will keep the same behavior
as before.
This patch mainly do three tasks:
1) Add a new flow sample action NXAST_SAMPLE2 to support exporting
tunnel information. NXAST_SAMPLE2 action has a new added field
'sampling_port'.
2) Use 'other_configure: enable-tunnel-sampling' to enable or disable
exporting tunnel information.
3) If 'sampling_port' is equal to output port and output port is a tunnel
port, the translation of OpenFlow "sample" action should first emit
set(tunnel(...)), then the sample action itself. It makes sure the
egress tunnel information can be sampled.
4) Add a test of flow-based IPFIX for tunnel set.
How to test flow-based IPFIX:
1) Setup a test environment with two Linux host with Docker supported
2) Create a Docker container and a GRE tunnel port on each host
3) Use ovs-docker to add the container on the bridge
4) Listen on port 4739 on the collector machine and use wireshark to filter
'cflow' packets.
5) Configure flow-based IPFIX:
- 'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
- 'ovs-vsctl -- set Flow_Sample_Collector_Set
"Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX \
targets=\"IP:4739\" cache_active_timeout=60 cache_max_flows=13 \
other_config:enable-tunnel-sampling=true'
- 'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
obs_point_id=456,sampling_port=3')',output:3'
Note: The in-port is container port. The output port and sampling_port
are both open flow port and the output port is a GRE tunnel port.
6) Ping from the container whose host enabled flow-based IPFIX.
7) Get the IPFIX template pakcets and IPFIX information packets.
Signed-off-by: Benli Ye <daniely@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2016-06-14 16:53:34 +08:00
|
|
|
|
if (upcall->out_tun_key) {
|
2025-06-05 16:51:27 +02:00
|
|
|
|
if (odp_tun_key_from_attr(upcall->out_tun_key,
|
|
|
|
|
&output_tunnel_key,
|
|
|
|
|
NULL) != ODP_FIT_ERROR) {
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
ipfix: Support tunnel information for Flow IPFIX.
Add support to export tunnel information for flow-based IPFIX.
The original steps to configure flow level IPFIX:
1) Create a new record in Flow_Sample_Collector_Set table:
'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
2) Add IPFIX configuration which is referred by corresponding
row in Flow_Sample_Collector_Set table:
'ovs-vsctl -- set Flow_Sample_Collector_Set
"Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX
targets=\"IP:4739\" obs_domain_id=123 obs_point_id=456
cache_active_timeout=60 cache_max_flows=13'
3) Add sample action to the flows:
'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,
obs_domain_id=123,obs_point_id=456')',output:3'
NXAST_SAMPLE action was used in step 3. In order to support exporting tunnel
information, the NXAST_SAMPLE2 action was added and with NXAST_SAMPLE2 action
in this patch, the step 3 should be configured like below:
'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
obs_point_id=456,sampling_port=3')',output:3'
'sampling_port' can be equal to ingress port or one of egress ports. If sampling
port is equal to output port and the output port is a tunnel port,
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT will be set in the datapath flow sample action.
When flow sample action upcall happens, tunnel information will be retrieved from
the datapath and then IPFIX can export egress tunnel port information. If
samping_port=65535 (OFPP_NONE), flow-based IPFIX will keep the same behavior
as before.
This patch mainly do three tasks:
1) Add a new flow sample action NXAST_SAMPLE2 to support exporting
tunnel information. NXAST_SAMPLE2 action has a new added field
'sampling_port'.
2) Use 'other_configure: enable-tunnel-sampling' to enable or disable
exporting tunnel information.
3) If 'sampling_port' is equal to output port and output port is a tunnel
port, the translation of OpenFlow "sample" action should first emit
set(tunnel(...)), then the sample action itself. It makes sure the
egress tunnel information can be sampled.
4) Add a test of flow-based IPFIX for tunnel set.
How to test flow-based IPFIX:
1) Setup a test environment with two Linux host with Docker supported
2) Create a Docker container and a GRE tunnel port on each host
3) Use ovs-docker to add the container on the bridge
4) Listen on port 4739 on the collector machine and use wireshark to filter
'cflow' packets.
5) Configure flow-based IPFIX:
- 'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
- 'ovs-vsctl -- set Flow_Sample_Collector_Set
"Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX \
targets=\"IP:4739\" cache_active_timeout=60 cache_max_flows=13 \
other_config:enable-tunnel-sampling=true'
- 'ovs-ofctl add-flow mybridge in_port=1,
actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
obs_point_id=456,sampling_port=3')',output:3'
Note: The in-port is container port. The output port and sampling_port
are both open flow port and the output port is a GRE tunnel port.
6) Ping from the container whose host enabled flow-based IPFIX.
7) Get the IPFIX template pakcets and IPFIX information packets.
Signed-off-by: Benli Ye <daniely@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2016-06-14 16:53:34 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-09-29 17:44:08 -07:00
|
|
|
|
actions_len = dpif_read_actions(udpif, upcall, flow,
|
|
|
|
|
upcall->type, &ipfix_actions);
|
2018-01-04 14:01:31 -08:00
|
|
|
|
if (upcall->type == IPFIX_UPCALL) {
|
|
|
|
|
dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
|
|
|
|
|
flow->in_port.odp_port,
|
|
|
|
|
upcall->cookie.ipfix.output_odp_port,
|
|
|
|
|
upcall->out_tun_key ?
|
|
|
|
|
&output_tunnel_key : NULL,
|
|
|
|
|
actions_len > 0 ?
|
|
|
|
|
&ipfix_actions: NULL);
|
|
|
|
|
} else {
|
|
|
|
|
/* The flow reflects exactly the contents of the packet.
|
|
|
|
|
* Sample the packet using it. */
|
|
|
|
|
dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
|
|
|
|
|
&upcall->cookie, flow->in_port.odp_port,
|
|
|
|
|
upcall->out_tun_key ?
|
|
|
|
|
&output_tunnel_key : NULL,
|
|
|
|
|
actions_len > 0 ? &ipfix_actions: NULL);
|
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-07-26 06:51:55 +00:00
|
|
|
|
break;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
|
2017-07-05 15:17:52 -07:00
|
|
|
|
case CONTROLLER_UPCALL:
|
|
|
|
|
{
|
|
|
|
|
struct user_action_cookie *cookie = &upcall->cookie;
|
|
|
|
|
|
|
|
|
|
if (cookie->controller.dont_send) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t recirc_id = cookie->controller.recirc_id;
|
|
|
|
|
if (!recirc_id) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct recirc_id_node *recirc_node
|
|
|
|
|
= recirc_id_node_find(recirc_id);
|
|
|
|
|
if (!recirc_node) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-18 23:16:22 -07:00
|
|
|
|
const struct frozen_state *state = &recirc_node->state;
|
|
|
|
|
|
2017-07-05 15:17:52 -07:00
|
|
|
|
struct ofproto_async_msg *am = xmalloc(sizeof *am);
|
|
|
|
|
*am = (struct ofproto_async_msg) {
|
|
|
|
|
.controller_id = cookie->controller.controller_id,
|
|
|
|
|
.oam = OAM_PACKET_IN,
|
|
|
|
|
.pin = {
|
|
|
|
|
.up = {
|
|
|
|
|
.base = {
|
|
|
|
|
.packet = xmemdup(dp_packet_data(packet),
|
|
|
|
|
dp_packet_size(packet)),
|
|
|
|
|
.packet_len = dp_packet_size(packet),
|
|
|
|
|
.reason = cookie->controller.reason,
|
2017-10-18 23:16:22 -07:00
|
|
|
|
.table_id = state->table_id,
|
2017-07-05 15:17:52 -07:00
|
|
|
|
.cookie = get_32aligned_be64(
|
|
|
|
|
&cookie->controller.rule_cookie),
|
|
|
|
|
.userdata = (recirc_node->state.userdata_len
|
|
|
|
|
? xmemdup(recirc_node->state.userdata,
|
|
|
|
|
recirc_node->state.userdata_len)
|
|
|
|
|
: NULL),
|
|
|
|
|
.userdata_len = recirc_node->state.userdata_len,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
.max_len = cookie->controller.max_len,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
2017-10-18 23:16:22 -07:00
|
|
|
|
if (cookie->controller.continuation) {
|
|
|
|
|
am->pin.up.stack = (state->stack_size
|
|
|
|
|
? xmemdup(state->stack, state->stack_size)
|
|
|
|
|
: NULL),
|
|
|
|
|
am->pin.up.stack_size = state->stack_size,
|
|
|
|
|
am->pin.up.mirrors = state->mirrors,
|
|
|
|
|
am->pin.up.conntracked = state->conntracked,
|
|
|
|
|
am->pin.up.actions = (state->ofpacts_len
|
|
|
|
|
? xmemdup(state->ofpacts,
|
|
|
|
|
state->ofpacts_len) : NULL),
|
|
|
|
|
am->pin.up.actions_len = state->ofpacts_len,
|
|
|
|
|
am->pin.up.action_set = (state->action_set_len
|
|
|
|
|
? xmemdup(state->action_set,
|
|
|
|
|
state->action_set_len)
|
|
|
|
|
: NULL),
|
|
|
|
|
am->pin.up.action_set_len = state->action_set_len,
|
|
|
|
|
am->pin.up.bridge = upcall->ofproto->uuid;
|
2019-06-21 10:51:23 -07:00
|
|
|
|
am->pin.up.odp_port = upcall->packet->md.in_port.odp_port;
|
2017-10-18 23:16:22 -07:00
|
|
|
|
}
|
|
|
|
|
|
2017-07-05 15:17:52 -07:00
|
|
|
|
/* We don't want to use the upcall 'flow', since it may be
|
|
|
|
|
* more specific than the point at which the "controller"
|
|
|
|
|
* action was specified. */
|
|
|
|
|
struct flow frozen_flow;
|
|
|
|
|
|
|
|
|
|
frozen_flow = *flow;
|
2017-10-18 23:16:22 -07:00
|
|
|
|
if (!state->conntracked) {
|
2017-07-05 15:17:52 -07:00
|
|
|
|
flow_clear_conntrack(&frozen_flow);
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-09 11:37:38 -07:00
|
|
|
|
frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata,
|
|
|
|
|
&frozen_flow);
|
2017-07-05 15:17:52 -07:00
|
|
|
|
flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);
|
|
|
|
|
|
|
|
|
|
ofproto_dpif_send_async_msg(upcall->ofproto, am);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2014-07-26 06:51:55 +00:00
|
|
|
|
case BAD_UPCALL:
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-09-24 15:04:04 -07:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
return EAGAIN;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2014-07-26 06:51:55 +00:00
|
|
|
|
handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
|
2014-05-20 21:50:19 -07:00
|
|
|
|
size_t n_upcalls)
|
2014-02-26 23:03:24 -08:00
|
|
|
|
{
|
2014-05-20 21:50:19 -07:00
|
|
|
|
struct dpif_op *opsp[UPCALL_MAX_BATCH * 2];
|
2014-08-21 00:21:03 +12:00
|
|
|
|
struct ukey_op ops[UPCALL_MAX_BATCH * 2];
|
2014-07-25 13:54:24 +12:00
|
|
|
|
size_t n_ops, n_opsp, i;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
|
2014-05-20 21:50:19 -07:00
|
|
|
|
/* Handle the packets individually in order of arrival.
|
2013-09-19 11:03:47 -07:00
|
|
|
|
*
|
2017-02-16 17:11:54 -08:00
|
|
|
|
* - For SLOW_CFM, SLOW_LACP, SLOW_STP, SLOW_BFD, and SLOW_LLDP,
|
|
|
|
|
* translation is what processes received packets for these
|
|
|
|
|
* protocols.
|
2013-09-19 11:03:47 -07:00
|
|
|
|
*
|
2017-02-16 17:11:54 -08:00
|
|
|
|
* - For SLOW_ACTION, translation executes the actions directly.
|
|
|
|
|
*
|
2013-09-19 11:03:47 -07:00
|
|
|
|
* The loop fills 'ops' with an array of operations to execute in the
|
|
|
|
|
* datapath. */
|
|
|
|
|
n_ops = 0;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
for (i = 0; i < n_upcalls; i++) {
|
|
|
|
|
struct upcall *upcall = &upcalls[i];
|
2015-02-22 03:21:09 -08:00
|
|
|
|
const struct dp_packet *packet = upcall->packet;
|
2014-08-21 00:21:03 +12:00
|
|
|
|
struct ukey_op *op;
|
2014-01-07 00:17:25 -08:00
|
|
|
|
|
2016-08-18 14:50:09 -07:00
|
|
|
|
if (should_install_flow(udpif, upcall)) {
|
2014-08-06 16:40:37 +12:00
|
|
|
|
struct udpif_key *ukey = upcall->ukey;
|
2014-01-07 00:17:25 -08:00
|
|
|
|
|
2016-08-31 11:06:04 -07:00
|
|
|
|
if (ukey_install(udpif, ukey)) {
|
2016-08-31 11:06:03 -07:00
|
|
|
|
upcall->ukey_persists = true;
|
|
|
|
|
put_op_init(&ops[n_ops++], ukey, DPIF_FP_CREATE);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
if (upcall->odp_actions.size) {
|
2013-09-19 11:03:47 -07:00
|
|
|
|
op = &ops[n_ops++];
|
2014-07-25 13:54:24 +12:00
|
|
|
|
op->ukey = NULL;
|
2014-08-21 00:21:03 +12:00
|
|
|
|
op->dop.type = DPIF_OP_EXECUTE;
|
2018-05-24 10:32:59 -07:00
|
|
|
|
op->dop.execute.packet = CONST_CAST(struct dp_packet *, packet);
|
|
|
|
|
op->dop.execute.flow = upcall->flow;
|
2017-06-02 16:16:17 +00:00
|
|
|
|
odp_key_to_dp_packet(upcall->key, upcall->key_len,
|
2018-05-24 10:32:59 -07:00
|
|
|
|
op->dop.execute.packet);
|
|
|
|
|
op->dop.execute.actions = upcall->odp_actions.data;
|
|
|
|
|
op->dop.execute.actions_len = upcall->odp_actions.size;
|
|
|
|
|
op->dop.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
|
|
|
|
|
op->dop.execute.probe = false;
|
|
|
|
|
op->dop.execute.mtu = upcall->mru;
|
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.
The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-11-15 10:58:59 +08:00
|
|
|
|
op->dop.execute.hash = upcall->hash;
|
dpif-netlink: Provide original upcall pid in 'execute' commands.
When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall. With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet. This allows us to send
packets from the same traffic flow through the same handler.
The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.
While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again. And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.
However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread. So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.
The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port. In real setups the
number of recirculations can go up to 4 or 5, sometimes more.
There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet. So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.
For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2. Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets. Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.
Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID. This way the upcall triggered during the
execution will go to the same handler. Packets will be enqueued to
the same socket and re-injected in the same order. This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.
The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.
Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-07-08 13:34:02 +02:00
|
|
|
|
op->dop.execute.upcall_pid = upcall->pid;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-08-31 11:06:04 -07:00
|
|
|
|
/* Execute batch. */
|
2014-07-25 13:54:24 +12:00
|
|
|
|
n_opsp = 0;
|
2013-12-16 08:14:52 -08:00
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
2014-07-25 13:54:24 +12:00
|
|
|
|
opsp[n_opsp++] = &ops[i].dop;
|
|
|
|
|
}
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
dpif_operate(udpif->dpif, opsp, n_opsp, DPIF_OFFLOAD_AUTO);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
2016-08-31 11:06:04 -07:00
|
|
|
|
struct udpif_key *ukey = ops[i].ukey;
|
|
|
|
|
|
|
|
|
|
if (ukey) {
|
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
|
|
|
|
if (ops[i].dop.error) {
|
|
|
|
|
transition_ukey(ukey, UKEY_EVICTED);
|
2017-03-20 14:08:19 -07:00
|
|
|
|
} else if (ukey->state < UKEY_OPERATIONAL) {
|
2016-08-31 11:06:04 -07:00
|
|
|
|
transition_ukey(ukey, UKEY_OPERATIONAL);
|
|
|
|
|
}
|
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
2013-12-16 08:14:52 -08:00
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-09-24 15:24:39 +12:00
|
|
|
|
static uint32_t
|
2016-02-03 14:31:43 +03:00
|
|
|
|
get_ukey_hash(const ovs_u128 *ufid, const unsigned pmd_id)
|
2014-09-24 15:24:39 +12:00
|
|
|
|
{
|
2016-02-03 14:31:43 +03:00
|
|
|
|
return hash_2words(ufid->u32[0], pmd_id);
|
2014-09-24 15:24:39 +12:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static struct udpif_key *
|
2016-02-03 14:31:43 +03:00
|
|
|
|
ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid, const unsigned pmd_id)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
|
|
|
|
struct udpif_key *ukey;
|
2016-02-03 14:31:43 +03:00
|
|
|
|
int idx = get_ukey_hash(ufid, pmd_id) % N_UMAPS;
|
2014-09-24 15:24:39 +12:00
|
|
|
|
struct cmap *cmap = &udpif->ukeys[idx].cmap;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2016-02-03 14:31:43 +03:00
|
|
|
|
CMAP_FOR_EACH_WITH_HASH (ukey, cmap_node,
|
|
|
|
|
get_ukey_hash(ufid, pmd_id), cmap) {
|
2016-05-03 18:20:51 -07:00
|
|
|
|
if (ovs_u128_equals(ukey->ufid, *ufid)) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
return ukey;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-12 14:50:54 -07:00
|
|
|
|
/* Provides safe lockless access of RCU protected 'ukey->actions'. Callers may
|
|
|
|
|
* alternatively access the field directly if they take 'ukey->mutex'. */
|
|
|
|
|
static void
|
|
|
|
|
ukey_get_actions(struct udpif_key *ukey, const struct nlattr **actions, size_t *size)
|
|
|
|
|
{
|
|
|
|
|
const struct ofpbuf *buf = ovsrcu_get(struct ofpbuf *, &ukey->actions);
|
|
|
|
|
*actions = buf->data;
|
|
|
|
|
*size = buf->size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
ukey_set_actions(struct udpif_key *ukey, const struct ofpbuf *actions)
|
|
|
|
|
{
|
2018-04-19 13:24:06 +02:00
|
|
|
|
struct ofpbuf *old_actions = ovsrcu_get_protected(struct ofpbuf *,
|
|
|
|
|
&ukey->actions);
|
|
|
|
|
|
|
|
|
|
if (old_actions) {
|
|
|
|
|
ovsrcu_postpone(ofpbuf_delete, old_actions);
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-12 14:50:54 -07:00
|
|
|
|
ovsrcu_set(&ukey->actions, ofpbuf_clone(actions));
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
static struct udpif_key *
|
2014-09-24 15:24:39 +12:00
|
|
|
|
ukey_create__(const struct nlattr *key, size_t key_len,
|
2014-08-06 16:40:37 +12:00
|
|
|
|
const struct nlattr *mask, size_t mask_len,
|
2014-09-24 16:26:35 +12:00
|
|
|
|
bool ufid_present, const ovs_u128 *ufid,
|
2015-05-22 17:14:19 +01:00
|
|
|
|
const unsigned pmd_id, const struct ofpbuf *actions,
|
2018-04-04 13:26:02 +02:00
|
|
|
|
uint64_t reval_seq, long long int used,
|
2015-11-25 15:19:37 -08:00
|
|
|
|
uint32_t key_recirc_id, struct xlate_out *xout)
|
2014-06-04 09:59:23 +00:00
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
2014-02-11 13:55:34 -08:00
|
|
|
|
{
|
2015-11-25 15:19:37 -08:00
|
|
|
|
struct udpif_key *ukey = xmalloc(sizeof *ukey);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2014-08-06 16:40:37 +12:00
|
|
|
|
memcpy(&ukey->keybuf, key, key_len);
|
|
|
|
|
ukey->key = &ukey->keybuf.nla;
|
|
|
|
|
ukey->key_len = key_len;
|
|
|
|
|
memcpy(&ukey->maskbuf, mask, mask_len);
|
|
|
|
|
ukey->mask = &ukey->maskbuf.nla;
|
|
|
|
|
ukey->mask_len = mask_len;
|
2014-09-24 16:26:35 +12:00
|
|
|
|
ukey->ufid_present = ufid_present;
|
2014-09-24 15:24:39 +12:00
|
|
|
|
ukey->ufid = *ufid;
|
2014-10-12 18:18:47 -07:00
|
|
|
|
ukey->pmd_id = pmd_id;
|
2016-02-03 14:31:43 +03:00
|
|
|
|
ukey->hash = get_ukey_hash(&ukey->ufid, pmd_id);
|
2015-08-12 14:50:54 -07:00
|
|
|
|
|
|
|
|
|
ovsrcu_init(&ukey->actions, NULL);
|
|
|
|
|
ukey_set_actions(ukey, actions);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
|
|
|
|
|
ovs_mutex_init(&ukey->mutex);
|
2018-04-04 13:26:02 +02:00
|
|
|
|
ukey->dump_seq = 0; /* Not yet dumped */
|
2014-07-25 13:54:24 +12:00
|
|
|
|
ukey->reval_seq = reval_seq;
|
2016-08-31 11:06:04 -07:00
|
|
|
|
ukey->state = UKEY_CREATED;
|
2017-04-26 18:03:12 -07:00
|
|
|
|
ukey->state_thread = ovsthread_id_self();
|
|
|
|
|
ukey->state_where = OVS_SOURCE_LOCATOR;
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
ukey->created = ukey->flow_time = time_msec();
|
2024-11-28 14:18:16 +01:00
|
|
|
|
ukey->missed_dumps = 0;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
memset(&ukey->stats, 0, sizeof ukey->stats);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
ukey->stats.used = used;
|
2023-02-27 16:29:26 +01:00
|
|
|
|
ukey->dp_layer = NULL;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
ukey->xcache = NULL;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2018-10-18 21:43:13 +05:30
|
|
|
|
ukey->offloaded = false;
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
ukey->in_netdev = NULL;
|
2018-10-18 21:43:13 +05:30
|
|
|
|
ukey->flow_packets = ukey->flow_backlog_packets = 0;
|
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ukey->key_recirc_id = key_recirc_id;
|
|
|
|
|
recirc_refs_init(&ukey->recircs);
|
|
|
|
|
if (xout) {
|
|
|
|
|
/* Take ownership of the action recirc id references. */
|
|
|
|
|
recirc_refs_swap(&ukey->recircs, &xout->recircs);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
return ukey;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-25 13:54:24 +12:00
|
|
|
|
static struct udpif_key *
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ukey_create_from_upcall(struct upcall *upcall, struct flow_wildcards *wc)
|
2014-07-25 13:54:24 +12:00
|
|
|
|
{
|
2014-08-06 16:40:37 +12:00
|
|
|
|
struct odputil_keybuf keystub, maskstub;
|
|
|
|
|
struct ofpbuf keybuf, maskbuf;
|
2015-06-30 16:43:03 -07:00
|
|
|
|
bool megaflow;
|
2015-06-16 11:15:28 -07:00
|
|
|
|
struct odp_flow_key_parms odp_parms = {
|
|
|
|
|
.flow = upcall->flow,
|
2016-12-07 10:04:04 -08:00
|
|
|
|
.mask = wc ? &wc->masks : NULL,
|
2015-06-16 11:15:28 -07:00
|
|
|
|
};
|
2014-08-06 16:40:37 +12:00
|
|
|
|
|
2017-06-16 23:39:31 -07:00
|
|
|
|
odp_parms.support = upcall->ofproto->backer->rt_support.odp;
|
2014-08-06 16:40:37 +12:00
|
|
|
|
if (upcall->key_len) {
|
|
|
|
|
ofpbuf_use_const(&keybuf, upcall->key, upcall->key_len);
|
|
|
|
|
} else {
|
|
|
|
|
/* dpif-netdev doesn't provide a netlink-formatted flow key in the
|
|
|
|
|
* upcall, so convert the upcall's flow here. */
|
|
|
|
|
ofpbuf_use_stack(&keybuf, &keystub, sizeof keystub);
|
2015-06-16 11:15:28 -07:00
|
|
|
|
odp_flow_key_from_flow(&odp_parms, &keybuf);
|
2014-08-06 16:40:37 +12:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
atomic_read_relaxed(&enable_megaflows, &megaflow);
|
|
|
|
|
ofpbuf_use_stack(&maskbuf, &maskstub, sizeof maskstub);
|
2016-12-07 10:04:04 -08:00
|
|
|
|
if (megaflow && wc) {
|
2015-06-19 13:54:13 -07:00
|
|
|
|
odp_parms.key_buf = &keybuf;
|
2015-06-16 11:15:28 -07:00
|
|
|
|
odp_flow_key_from_mask(&odp_parms, &maskbuf);
|
2014-08-06 16:40:37 +12:00
|
|
|
|
}
|
|
|
|
|
|
2015-03-02 17:29:44 -08:00
|
|
|
|
return ukey_create__(keybuf.data, keybuf.size, maskbuf.data, maskbuf.size,
|
2014-10-12 18:18:47 -07:00
|
|
|
|
true, upcall->ufid, upcall->pmd_id,
|
2018-04-04 13:26:02 +02:00
|
|
|
|
&upcall->put_actions, upcall->reval_seq, 0,
|
2015-11-25 15:19:37 -08:00
|
|
|
|
upcall->have_recirc_ref ? upcall->recirc->id : 0,
|
2015-03-26 11:18:16 -07:00
|
|
|
|
&upcall->xout);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
static int
|
2014-07-25 13:54:24 +12:00
|
|
|
|
ukey_create_from_dpif_flow(const struct udpif *udpif,
|
2014-10-06 11:14:08 +13:00
|
|
|
|
const struct dpif_flow *flow,
|
|
|
|
|
struct udpif_key **ukey)
|
2014-07-25 13:54:24 +12:00
|
|
|
|
{
|
2014-10-06 11:14:08 +13:00
|
|
|
|
struct dpif_flow full_flow;
|
2014-08-06 16:40:37 +12:00
|
|
|
|
struct ofpbuf actions;
|
2018-04-04 13:26:02 +02:00
|
|
|
|
uint64_t reval_seq;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
uint64_t stub[DPIF_FLOW_BUFSIZE / 8];
|
2015-03-26 11:18:16 -07:00
|
|
|
|
const struct nlattr *a;
|
|
|
|
|
unsigned int left;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (!flow->key_len || !flow->actions_len) {
|
2014-10-06 11:14:08 +13:00
|
|
|
|
struct ofpbuf buf;
|
|
|
|
|
int err;
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* If the key or actions were not provided by the datapath, fetch the
|
|
|
|
|
* full flow. */
|
2014-10-06 11:14:08 +13:00
|
|
|
|
ofpbuf_use_stack(&buf, &stub, sizeof stub);
|
2016-05-10 15:42:01 -07:00
|
|
|
|
err = dpif_flow_get(udpif->dpif, flow->key, flow->key_len,
|
|
|
|
|
flow->ufid_present ? &flow->ufid : NULL,
|
2014-10-12 18:18:47 -07:00
|
|
|
|
flow->pmd_id, &buf, &full_flow);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
if (err) {
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
flow = &full_flow;
|
|
|
|
|
}
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
|
|
|
|
/* Check the flow actions for recirculation action. As recirculation
|
|
|
|
|
* relies on OVS userspace internal state, we need to delete all old
|
2015-11-04 15:47:36 -08:00
|
|
|
|
* datapath flows with either a non-zero recirc_id in the key, or any
|
|
|
|
|
* recirculation actions upon OVS restart. */
|
2017-07-31 16:54:22 -07:00
|
|
|
|
NL_ATTR_FOR_EACH (a, left, flow->key, flow->key_len) {
|
2015-11-04 15:47:36 -08:00
|
|
|
|
if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID
|
|
|
|
|
&& nl_attr_get_u32(a) != 0) {
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-07-31 16:54:21 -07:00
|
|
|
|
NL_ATTR_FOR_EACH (a, left, flow->actions, flow->actions_len) {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-01 12:58:06 -07:00
|
|
|
|
reval_seq = seq_read(udpif->reval_seq) - 1; /* Ensure revalidation. */
|
2019-11-29 06:13:35 +00:00
|
|
|
|
ofpbuf_use_const(&actions, flow->actions, flow->actions_len);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
*ukey = ukey_create__(flow->key, flow->key_len,
|
|
|
|
|
flow->mask, flow->mask_len, flow->ufid_present,
|
2018-04-04 13:26:02 +02:00
|
|
|
|
&flow->ufid, flow->pmd_id, &actions,
|
2015-11-25 15:19:37 -08:00
|
|
|
|
reval_seq, flow->stats.used, 0, NULL);
|
2014-10-12 18:18:47 -07:00
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
return 0;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
|
|
|
|
|
upcall: Replace ukeys for deleted flows.
If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.
However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.
To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-08-31 11:06:05 -07:00
|
|
|
|
static bool
|
|
|
|
|
try_ukey_replace(struct umap *umap, struct udpif_key *old_ukey,
|
|
|
|
|
struct udpif_key *new_ukey)
|
|
|
|
|
OVS_REQUIRES(umap->mutex)
|
|
|
|
|
OVS_TRY_LOCK(true, new_ukey->mutex)
|
|
|
|
|
{
|
|
|
|
|
bool replaced = false;
|
|
|
|
|
|
|
|
|
|
if (!ovs_mutex_trylock(&old_ukey->mutex)) {
|
|
|
|
|
if (old_ukey->state == UKEY_EVICTED) {
|
|
|
|
|
/* The flow was deleted during the current revalidator dump,
|
|
|
|
|
* but its ukey won't be fully cleaned up until the sweep phase.
|
|
|
|
|
* In the mean time, we are receiving upcalls for this traffic.
|
|
|
|
|
* Expedite the (new) flow install by replacing the ukey. */
|
|
|
|
|
ovs_mutex_lock(&new_ukey->mutex);
|
|
|
|
|
cmap_replace(&umap->cmap, &old_ukey->cmap_node,
|
|
|
|
|
&new_ukey->cmap_node, new_ukey->hash);
|
ofproto-dpif-upcall: New ukey needs to take the old ukey's dump seq.
The userspace datapath manages all the magaflows by a cmap. The cmap
data structure will grow/shrink during the datapath processing and it
will re-position megaflows. This might result in two revalidator threads
might process a same megaflow during one dump stage.
Consider a situation that, revalidator 1 processes a megaflow A, and
decides to delete it from the datapath, at the mean time, this megaflow
A is also queued in the process batch of revalidator 2. Normally it's ok
for revalidators to process the same megaflow multiple times, as the
dump_seq shows it's already dumped and the stats will not be contributed
twice.
Assume that right after A is deleted, a PMD thread generates again
a new megaflow B which has the same match and action of A. The ukey
of megaflow B will replace the one of megaflow A. Now the ukey B is
new to the revalidator system and its dump seq is 0.
Now since the dump seq of ukey B is 0, when processing megaflow A,
the revalidator 2 will not identify this megaflow A has already been
dumped by revalidator 1 and will contribute the old megaflow A's stats
again, this results in an inconsistent stats between ukeys and megaflows.
To fix this, the newly generated the ukey B should take the dump_seq
of the replaced ukey A to avoid a same megaflow being revalidated
twice in one dump stage.
We observe in the production environment, the OpenFlow rules' stats
sometimes are amplified compared to the actual value.
Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-11-27 07:28:55 +00:00
|
|
|
|
new_ukey->dump_seq = old_ukey->dump_seq;
|
upcall: Replace ukeys for deleted flows.
If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.
However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.
To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-08-31 11:06:05 -07:00
|
|
|
|
ovsrcu_postpone(ukey_delete__, old_ukey);
|
|
|
|
|
transition_ukey(old_ukey, UKEY_DELETED);
|
|
|
|
|
transition_ukey(new_ukey, UKEY_VISIBLE);
|
|
|
|
|
replaced = true;
|
2024-04-04 14:09:37 +02:00
|
|
|
|
COVERAGE_INC(upcall_ukey_replace);
|
|
|
|
|
} else {
|
|
|
|
|
COVERAGE_INC(handler_duplicate_upcall);
|
upcall: Replace ukeys for deleted flows.
If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.
However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.
To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-08-31 11:06:05 -07:00
|
|
|
|
}
|
|
|
|
|
ovs_mutex_unlock(&old_ukey->mutex);
|
|
|
|
|
} else {
|
2024-04-04 14:09:37 +02:00
|
|
|
|
COVERAGE_INC(ukey_replace_contention);
|
upcall: Replace ukeys for deleted flows.
If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.
However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.
To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-08-31 11:06:05 -07:00
|
|
|
|
}
|
2024-04-04 14:09:37 +02:00
|
|
|
|
|
upcall: Replace ukeys for deleted flows.
If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.
However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.
To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-08-31 11:06:05 -07:00
|
|
|
|
return replaced;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-25 13:54:24 +12:00
|
|
|
|
/* Attempts to insert a ukey into the shared ukey maps.
|
|
|
|
|
*
|
|
|
|
|
* On success, returns true, installs the ukey and returns it in a locked
|
|
|
|
|
* state. Otherwise, returns false. */
|
|
|
|
|
static bool
|
2016-08-31 11:06:04 -07:00
|
|
|
|
ukey_install__(struct udpif *udpif, struct udpif_key *new_ukey)
|
2014-07-25 13:54:24 +12:00
|
|
|
|
OVS_TRY_LOCK(true, new_ukey->mutex)
|
|
|
|
|
{
|
|
|
|
|
struct umap *umap;
|
|
|
|
|
struct udpif_key *old_ukey;
|
|
|
|
|
uint32_t idx;
|
|
|
|
|
bool locked = false;
|
|
|
|
|
|
|
|
|
|
idx = new_ukey->hash % N_UMAPS;
|
|
|
|
|
umap = &udpif->ukeys[idx];
|
|
|
|
|
ovs_mutex_lock(&umap->mutex);
|
2016-02-03 14:31:43 +03:00
|
|
|
|
old_ukey = ukey_lookup(udpif, &new_ukey->ufid, new_ukey->pmd_id);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
if (old_ukey) {
|
|
|
|
|
/* Uncommon case: A ukey is already installed with the same UFID. */
|
|
|
|
|
if (old_ukey->key_len == new_ukey->key_len
|
|
|
|
|
&& !memcmp(old_ukey->key, new_ukey->key, new_ukey->key_len)) {
|
upcall: Replace ukeys for deleted flows.
If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.
However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.
To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-08-31 11:06:05 -07:00
|
|
|
|
locked = try_ukey_replace(umap, old_ukey, new_ukey);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
} else {
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
|
2014-09-24 16:26:35 +12:00
|
|
|
|
odp_format_ufid(&old_ukey->ufid, &ds);
|
|
|
|
|
ds_put_cstr(&ds, " ");
|
2014-07-25 13:54:24 +12:00
|
|
|
|
odp_flow_key_format(old_ukey->key, old_ukey->key_len, &ds);
|
|
|
|
|
ds_put_cstr(&ds, "\n");
|
2014-09-24 16:26:35 +12:00
|
|
|
|
odp_format_ufid(&new_ukey->ufid, &ds);
|
|
|
|
|
ds_put_cstr(&ds, " ");
|
2014-07-25 13:54:24 +12:00
|
|
|
|
odp_flow_key_format(new_ukey->key, new_ukey->key_len, &ds);
|
|
|
|
|
|
|
|
|
|
VLOG_WARN_RL(&rl, "Conflicting ukey for flows:\n%s", ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
ovs_mutex_lock(&new_ukey->mutex);
|
|
|
|
|
cmap_insert(&umap->cmap, &new_ukey->cmap_node, new_ukey->hash);
|
2016-08-31 11:06:04 -07:00
|
|
|
|
transition_ukey(new_ukey, UKEY_VISIBLE);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
locked = true;
|
|
|
|
|
}
|
|
|
|
|
ovs_mutex_unlock(&umap->mutex);
|
|
|
|
|
|
|
|
|
|
return locked;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2017-04-26 18:03:12 -07:00
|
|
|
|
transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
|
|
|
|
|
const char *where)
|
2016-08-31 11:06:04 -07:00
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
2014-07-25 13:54:24 +12:00
|
|
|
|
{
|
2017-04-26 18:03:12 -07:00
|
|
|
|
if (dst < ukey->state) {
|
|
|
|
|
VLOG_ABORT("Invalid ukey transition %d->%d (last transitioned from "
|
|
|
|
|
"thread %u at %s)", ukey->state, dst, ukey->state_thread,
|
|
|
|
|
ukey->state_where);
|
|
|
|
|
}
|
2017-01-10 15:54:03 -08:00
|
|
|
|
if (ukey->state == dst && dst == UKEY_OPERATIONAL) {
|
2016-08-31 11:06:04 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Valid state transitions:
|
|
|
|
|
* UKEY_CREATED -> UKEY_VISIBLE
|
|
|
|
|
* Ukey is now visible in the umap.
|
|
|
|
|
* UKEY_VISIBLE -> UKEY_OPERATIONAL
|
|
|
|
|
* A handler has installed the flow, and the flow is in the datapath.
|
|
|
|
|
* UKEY_VISIBLE -> UKEY_EVICTING
|
|
|
|
|
* A handler installs the flow, then revalidator sweeps the ukey before
|
|
|
|
|
* the flow is dumped. Most likely the flow was installed; start trying
|
|
|
|
|
* to delete it.
|
|
|
|
|
* UKEY_VISIBLE -> UKEY_EVICTED
|
|
|
|
|
* A handler attempts to install the flow, but the datapath rejects it.
|
|
|
|
|
* Consider that the datapath has already destroyed it.
|
2023-07-01 05:11:16 +00:00
|
|
|
|
* UKEY_OPERATIONAL -> UKEY_INCONSISTENT
|
|
|
|
|
* A revalidator modifies the flow with error returns.
|
|
|
|
|
* UKEY_INCONSISTENT -> UKEY_EVICTING
|
|
|
|
|
* A revalidator decides to evict the datapath flow.
|
2016-08-31 11:06:04 -07:00
|
|
|
|
* UKEY_OPERATIONAL -> UKEY_EVICTING
|
|
|
|
|
* A revalidator decides to evict the datapath flow.
|
|
|
|
|
* UKEY_EVICTING -> UKEY_EVICTED
|
|
|
|
|
* A revalidator has evicted the datapath flow.
|
|
|
|
|
* UKEY_EVICTED -> UKEY_DELETED
|
|
|
|
|
* A revalidator has removed the ukey from the umap and is deleting it.
|
|
|
|
|
*/
|
2023-07-01 05:11:16 +00:00
|
|
|
|
if (ukey->state == dst - 1 ||
|
|
|
|
|
(ukey->state == UKEY_VISIBLE && dst < UKEY_DELETED) ||
|
|
|
|
|
(ukey->state == UKEY_OPERATIONAL && dst == UKEY_EVICTING)) {
|
2016-08-31 11:06:04 -07:00
|
|
|
|
ukey->state = dst;
|
|
|
|
|
} else {
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
|
2016-08-31 11:06:04 -07:00
|
|
|
|
odp_format_ufid(&ukey->ufid, &ds);
|
|
|
|
|
VLOG_WARN_RL(&rl, "Invalid state transition for ukey %s: %d -> %d",
|
|
|
|
|
ds_cstr(&ds), ukey->state, dst);
|
|
|
|
|
ds_destroy(&ds);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
2017-04-26 18:03:12 -07:00
|
|
|
|
ukey->state_thread = ovsthread_id_self();
|
|
|
|
|
ukey->state_where = where;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
ukey_install(struct udpif *udpif, struct udpif_key *ukey)
|
|
|
|
|
{
|
2016-08-31 11:06:04 -07:00
|
|
|
|
bool installed;
|
|
|
|
|
|
|
|
|
|
installed = ukey_install__(udpif, ukey);
|
|
|
|
|
if (installed) {
|
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return installed;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Searches for a ukey in 'udpif->ukeys' that matches 'flow' and attempts to
|
|
|
|
|
* lock the ukey. If the ukey does not exist, create it.
|
2014-04-10 07:14:08 +00:00
|
|
|
|
*
|
2014-10-06 11:14:08 +13:00
|
|
|
|
* Returns 0 on success, setting *result to the matching ukey and returning it
|
|
|
|
|
* in a locked state. Otherwise, returns an errno and clears *result. EBUSY
|
|
|
|
|
* indicates that another thread is handling this flow. Other errors indicate
|
|
|
|
|
* an unexpected condition creating a new ukey.
|
|
|
|
|
*
|
|
|
|
|
* *error is an output parameter provided to appease the threadsafety analyser,
|
|
|
|
|
* and its value matches the return value. */
|
2014-07-25 13:54:24 +12:00
|
|
|
|
static int
|
|
|
|
|
ukey_acquire(struct udpif *udpif, const struct dpif_flow *flow,
|
2014-10-06 11:14:08 +13:00
|
|
|
|
struct udpif_key **result, int *error)
|
|
|
|
|
OVS_TRY_LOCK(0, (*result)->mutex)
|
2014-04-10 07:14:08 +00:00
|
|
|
|
{
|
2014-06-04 09:59:23 +00:00
|
|
|
|
struct udpif_key *ukey;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
int retval;
|
2014-06-04 09:59:23 +00:00
|
|
|
|
|
2016-02-03 14:31:43 +03:00
|
|
|
|
ukey = ukey_lookup(udpif, &flow->ufid, flow->pmd_id);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
if (ukey) {
|
2014-10-06 11:14:08 +13:00
|
|
|
|
retval = ovs_mutex_trylock(&ukey->mutex);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
} else {
|
|
|
|
|
/* Usually we try to avoid installing flows from revalidator threads,
|
|
|
|
|
* because locking on a umap may cause handler threads to block.
|
|
|
|
|
* However there are certain cases, like when ovs-vswitchd is
|
|
|
|
|
* restarted, where it is desirable to handle flows that exist in the
|
|
|
|
|
* datapath gracefully (ie, don't just clear the datapath). */
|
2014-10-06 11:14:08 +13:00
|
|
|
|
bool install;
|
|
|
|
|
|
|
|
|
|
retval = ukey_create_from_dpif_flow(udpif, flow, &ukey);
|
|
|
|
|
if (retval) {
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
2016-08-31 11:06:04 -07:00
|
|
|
|
install = ukey_install__(udpif, ukey);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
if (install) {
|
|
|
|
|
retval = 0;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
} else {
|
|
|
|
|
ukey_delete__(ukey);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
retval = EBUSY;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
done:
|
|
|
|
|
*error = retval;
|
|
|
|
|
if (retval) {
|
2014-06-04 09:59:23 +00:00
|
|
|
|
*result = NULL;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
} else {
|
|
|
|
|
*result = ukey;
|
2014-06-04 09:59:23 +00:00
|
|
|
|
}
|
2014-10-06 11:14:08 +13:00
|
|
|
|
return retval;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void
|
2014-05-05 15:44:40 +12:00
|
|
|
|
ukey_delete__(struct udpif_key *ukey)
|
2014-04-10 07:14:08 +00:00
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2014-07-25 13:54:24 +12:00
|
|
|
|
if (ukey) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
if (ukey->key_recirc_id) {
|
|
|
|
|
recirc_free_id(ukey->key_recirc_id);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2015-11-25 15:19:37 -08:00
|
|
|
|
recirc_refs_unref(&ukey->recircs);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
xlate_cache_delete(ukey->xcache);
|
2015-08-12 14:50:54 -07:00
|
|
|
|
ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions));
|
2014-07-25 13:54:24 +12:00
|
|
|
|
ovs_mutex_destroy(&ukey->mutex);
|
|
|
|
|
free(ukey);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-05 15:44:40 +12:00
|
|
|
|
static void
|
2014-06-05 17:28:46 +12:00
|
|
|
|
ukey_delete(struct umap *umap, struct udpif_key *ukey)
|
|
|
|
|
OVS_REQUIRES(umap->mutex)
|
2014-05-05 15:44:40 +12:00
|
|
|
|
{
|
2016-08-31 11:06:04 -07:00
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
2017-01-10 15:54:02 -08:00
|
|
|
|
if (ukey->state < UKEY_DELETED) {
|
|
|
|
|
cmap_remove(&umap->cmap, &ukey->cmap_node, ukey->hash);
|
|
|
|
|
ovsrcu_postpone(ukey_delete__, ukey);
|
|
|
|
|
transition_ukey(ukey, UKEY_DELETED);
|
|
|
|
|
}
|
2016-08-31 11:06:04 -07:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2014-05-05 15:44:40 +12:00
|
|
|
|
}
|
|
|
|
|
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
static bool
|
2023-03-08 13:55:44 +01:00
|
|
|
|
should_revalidate(const struct udpif *udpif, const struct udpif_key *ukey,
|
|
|
|
|
uint64_t packets)
|
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
{
|
|
|
|
|
long long int metric, now, duration;
|
2023-03-08 13:55:44 +01:00
|
|
|
|
long long int used = ukey->stats.used;
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
|
2023-01-16 19:01:29 -08:00
|
|
|
|
if (!ofproto_min_revalidate_pps) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 13:13:04 -07:00
|
|
|
|
if (!used) {
|
|
|
|
|
/* Always revalidate the first time a flow is dumped. */
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-21 11:34:22 +03:00
|
|
|
|
if (udpif->dump_duration < ofproto_max_revalidator / 2) {
|
2014-07-02 07:41:33 +00:00
|
|
|
|
/* We are likely to handle full revalidation for the flows. */
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
/* Calculate the mean time between seeing these packets. If this
|
|
|
|
|
* exceeds the threshold, then delete the flow rather than performing
|
|
|
|
|
* costly revalidation for flows that aren't being hit frequently.
|
|
|
|
|
*
|
|
|
|
|
* This is targeted at situations where the dump_duration is high (~1s),
|
|
|
|
|
* and revalidation is triggered by a call to udpif_revalidate(). In
|
|
|
|
|
* these situations, revalidation of all flows causes fluctuations in the
|
|
|
|
|
* flow_limit due to the interaction with the dump_duration and max_idle.
|
|
|
|
|
* This tends to result in deletion of low-throughput flows anyway, so
|
|
|
|
|
* skip the revalidation and just delete those flows. */
|
|
|
|
|
packets = MAX(packets, 1);
|
|
|
|
|
now = MAX(used, time_msec());
|
|
|
|
|
duration = now - used;
|
|
|
|
|
metric = duration / packets;
|
|
|
|
|
|
2023-03-08 13:55:44 +01:00
|
|
|
|
if (metric < 1000 / ofproto_min_revalidate_pps ||
|
|
|
|
|
(ukey->offloaded && duration < ofproto_offloaded_stats_delay)) {
|
|
|
|
|
/* The flow is receiving more than min-revalidate-pps, so keep it.
|
|
|
|
|
* Or it's a hardware offloaded flow that might take up to X seconds
|
|
|
|
|
* to update its statistics. Until we are sure the statistics had a
|
|
|
|
|
* chance to be updated, also keep it. */
|
2014-07-02 07:41:33 +00:00
|
|
|
|
return true;
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
}
|
2014-07-02 07:41:33 +00:00
|
|
|
|
return false;
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 12:33:51 -07:00
|
|
|
|
struct reval_context {
|
|
|
|
|
/* Optional output parameters */
|
|
|
|
|
struct flow_wildcards *wc;
|
|
|
|
|
struct ofpbuf *odp_actions;
|
|
|
|
|
struct netflow **netflow;
|
|
|
|
|
struct xlate_cache *xcache;
|
|
|
|
|
|
|
|
|
|
/* Required output parameters */
|
|
|
|
|
struct xlate_out xout;
|
|
|
|
|
struct flow flow;
|
|
|
|
|
};
|
|
|
|
|
|
2016-09-20 16:41:31 -07:00
|
|
|
|
/* Translates 'key' into a flow, populating 'ctx' as it goes along.
|
2016-09-20 12:33:51 -07:00
|
|
|
|
*
|
|
|
|
|
* Returns 0 on success, otherwise a positive errno value.
|
|
|
|
|
*
|
|
|
|
|
* The caller is responsible for uninitializing ctx->xout on success.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-09-20 16:41:31 -07:00
|
|
|
|
xlate_key(struct udpif *udpif, const struct nlattr *key, unsigned int len,
|
|
|
|
|
const struct dpif_flow_stats *push, struct reval_context *ctx)
|
2016-09-20 12:33:51 -07:00
|
|
|
|
{
|
|
|
|
|
struct ofproto_dpif *ofproto;
|
|
|
|
|
ofp_port_t ofp_in_port;
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
enum odp_key_fitness fitness;
|
2016-09-20 12:33:51 -07:00
|
|
|
|
struct xlate_in xin;
|
|
|
|
|
int error;
|
|
|
|
|
|
2018-12-14 18:16:55 -08:00
|
|
|
|
fitness = odp_flow_key_to_flow(key, len, &ctx->flow, NULL);
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
if (fitness == ODP_FIT_ERROR) {
|
2016-09-20 12:33:51 -07:00
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
error = xlate_lookup(udpif->backer, &ctx->flow, &ofproto, NULL, NULL,
|
2022-09-01 17:42:49 +02:00
|
|
|
|
ctx->netflow, &ofp_in_port, NULL);
|
2016-09-20 12:33:51 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xlate_in_init(&xin, ofproto, ofproto_dpif_get_tables_version(ofproto),
|
|
|
|
|
&ctx->flow, ofp_in_port, NULL, push->tcp_flags,
|
|
|
|
|
NULL, ctx->wc, ctx->odp_actions);
|
|
|
|
|
if (push->n_packets) {
|
|
|
|
|
xin.resubmit_stats = push;
|
|
|
|
|
xin.allow_side_effects = true;
|
|
|
|
|
}
|
|
|
|
|
xin.xcache = ctx->xcache;
|
|
|
|
|
xlate_actions(&xin, &ctx->xout);
|
ofproto-dpif-upcall: Slow path flows that datapath can't fully match.
In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:
- The flow lacks one or more fields that userspace expects to be there,
that is, the datapath doesn't understand or parse them but userspace
does. This is, for example, what would happen if current OVS
userspace, which understands and extracts TCP flags, were to be
paired with an older OVS kernel module, which does not. Internally
OVS uses the name ODP_FIT_TOO_LITTLE for this situation.
- The flow includes fields that userspace does not know about, that is,
the datapath understands and parses them but userspace does not.
This is, for example, what would happen if an old OVS userspace that
does not understand or extract TCP flags, were to be paired with a
recent OVS kernel module that does. Internally, OVS uses the name
ODP_FIT_TOO_MUCH for this situation.
The latter is not a big deal and OVS doesn't have to do much to cope with
it.
The former is more of a problem. When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path. Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.
Somehow, this nuance was lost a long time. From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE. Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it. I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix. This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.
CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>
2018-01-24 11:40:19 -08:00
|
|
|
|
if (fitness == ODP_FIT_TOO_LITTLE) {
|
|
|
|
|
ctx->xout.slow |= SLOW_MATCH;
|
|
|
|
|
}
|
2016-09-20 12:33:51 -07:00
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 16:41:31 -07:00
|
|
|
|
static int
|
|
|
|
|
xlate_ukey(struct udpif *udpif, const struct udpif_key *ukey,
|
2016-09-20 14:08:21 -07:00
|
|
|
|
uint16_t tcp_flags, struct reval_context *ctx)
|
2016-09-20 16:41:31 -07:00
|
|
|
|
{
|
2016-09-20 14:08:21 -07:00
|
|
|
|
struct dpif_flow_stats push = {
|
|
|
|
|
.tcp_flags = tcp_flags,
|
|
|
|
|
};
|
|
|
|
|
return xlate_key(udpif, ukey->key, ukey->key_len, &push, ctx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
populate_xcache(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
uint16_t tcp_flags)
|
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
|
|
|
|
{
|
|
|
|
|
struct reval_context ctx = {
|
|
|
|
|
.odp_actions = NULL,
|
|
|
|
|
.netflow = NULL,
|
|
|
|
|
.wc = NULL,
|
|
|
|
|
};
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
ovs_assert(!ukey->xcache);
|
|
|
|
|
ukey->xcache = ctx.xcache = xlate_cache_new();
|
|
|
|
|
error = xlate_ukey(udpif, ukey, tcp_flags, &ctx);
|
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
xlate_out_uninit(&ctx.xout);
|
|
|
|
|
|
|
|
|
|
return 0;
|
2016-09-20 16:41:31 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
static enum reval_result
|
2016-09-20 14:58:00 -07:00
|
|
|
|
revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
|
2016-09-20 14:08:21 -07:00
|
|
|
|
uint16_t tcp_flags, struct ofpbuf *odp_actions,
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
struct recirc_refs *recircs, struct xlate_cache *xcache,
|
|
|
|
|
enum flow_del_reason *del_reason)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2016-09-20 12:33:51 -07:00
|
|
|
|
struct xlate_out *xoutp;
|
2014-04-01 21:21:45 +09:00
|
|
|
|
struct netflow *netflow;
|
2015-09-29 14:21:33 -07:00
|
|
|
|
struct flow_wildcards dp_mask, wc;
|
2015-08-03 18:43:53 -07:00
|
|
|
|
enum reval_result result;
|
2016-09-20 12:33:51 -07:00
|
|
|
|
struct reval_context ctx = {
|
|
|
|
|
.odp_actions = odp_actions,
|
|
|
|
|
.netflow = &netflow,
|
2016-09-20 14:58:00 -07:00
|
|
|
|
.xcache = xcache,
|
|
|
|
|
.wc = &wc,
|
2016-09-20 12:33:51 -07:00
|
|
|
|
};
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2023-01-23 12:03:29 +01:00
|
|
|
|
OVS_USDT_PROBE(revalidate_ukey__, entry, udpif, ukey, tcp_flags,
|
|
|
|
|
odp_actions, recircs, xcache);
|
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
result = UKEY_DELETE;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xoutp = NULL;
|
2014-04-01 21:21:45 +09:00
|
|
|
|
netflow = NULL;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2016-09-20 14:08:21 -07:00
|
|
|
|
if (xlate_ukey(udpif, ukey, tcp_flags, &ctx)) {
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
*del_reason = FDR_XLATION_ERROR;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
2016-09-20 12:33:51 -07:00
|
|
|
|
xoutp = &ctx.xout;
|
2013-09-23 10:57:22 -07:00
|
|
|
|
|
2017-03-10 15:44:40 -08:00
|
|
|
|
if (xoutp->avoid_caching) {
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
*del_reason = FDR_AVOID_CACHING;
|
2017-03-10 15:44:40 -08:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 12:33:51 -07:00
|
|
|
|
if (xoutp->slow) {
|
2017-04-11 16:10:41 -07:00
|
|
|
|
struct ofproto_dpif *ofproto;
|
2017-10-03 17:31:34 -07:00
|
|
|
|
ofp_port_t ofp_in_port;
|
|
|
|
|
|
2018-12-14 18:16:55 -08:00
|
|
|
|
ofproto = xlate_lookup_ofproto(udpif->backer, &ctx.flow, &ofp_in_port,
|
|
|
|
|
NULL);
|
2017-04-11 16:10:41 -07:00
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
ofpbuf_clear(odp_actions);
|
2018-03-05 15:04:01 -08:00
|
|
|
|
|
|
|
|
|
if (!ofproto) {
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
*del_reason = FDR_NO_OFPROTO;
|
2018-03-05 15:04:01 -08:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-25 15:14:13 -07:00
|
|
|
|
compose_slow_path(udpif, xoutp, ctx.flow.in_port.odp_port,
|
2017-07-05 15:17:52 -07:00
|
|
|
|
ofp_in_port, odp_actions,
|
|
|
|
|
ofproto->up.slowpath_meter_id, &ofproto->uuid);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-12-14 18:16:55 -08:00
|
|
|
|
if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, &dp_mask, &ctx.flow,
|
|
|
|
|
NULL)
|
2015-09-29 14:21:33 -07:00
|
|
|
|
== ODP_FIT_ERROR) {
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
*del_reason = FDR_BAD_ODP_FIT;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-29 14:21:33 -07:00
|
|
|
|
/* Do not modify if any bit is wildcarded by the installed datapath flow,
|
|
|
|
|
* but not the newly revalidated wildcard mask (wc), i.e., if revalidation
|
|
|
|
|
* tells that the datapath flow is now too generic and must be narrowed
|
|
|
|
|
* down. Note that we do not know if the datapath has ignored any of the
|
2018-01-24 09:47:23 -08:00
|
|
|
|
* wildcarded bits, so we may be overly conservative here. */
|
2016-09-20 12:33:51 -07:00
|
|
|
|
if (flow_wildcards_has_extra(&dp_mask, ctx.wc)) {
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
*del_reason = FDR_FLOW_WILDCARDED;
|
2015-09-29 14:21:33 -07:00
|
|
|
|
goto exit;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2014-08-06 16:40:37 +12:00
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
if (!ofpbuf_equal(odp_actions,
|
|
|
|
|
ovsrcu_get(struct ofpbuf *, &ukey->actions))) {
|
|
|
|
|
/* The datapath mask was OK, but the actions seem to have changed.
|
|
|
|
|
* Let's modify it in place. */
|
|
|
|
|
result = UKEY_MODIFY;
|
2015-11-25 15:19:37 -08:00
|
|
|
|
/* Transfer recirc action ID references to the caller. */
|
|
|
|
|
recirc_refs_swap(recircs, &xoutp->recircs);
|
2015-08-03 18:43:53 -07:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = UKEY_KEEP;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
exit:
|
2015-08-03 18:43:53 -07:00
|
|
|
|
if (netflow && result == UKEY_DELETE) {
|
2016-09-20 12:33:51 -07:00
|
|
|
|
netflow_flow_clear(netflow, &ctx.flow);
|
2014-04-01 21:21:45 +09:00
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xlate_out_uninit(xoutp);
|
2023-01-23 12:03:29 +01:00
|
|
|
|
|
|
|
|
|
OVS_USDT_PROBE(revalidate_ukey__, exit, udpif, ukey, result);
|
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
return result;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2023-05-26 14:03:38 +02:00
|
|
|
|
static void
|
|
|
|
|
log_unexpected_stats_jump(struct udpif_key *ukey,
|
|
|
|
|
const struct dpif_flow_stats *stats)
|
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
|
|
|
|
{
|
|
|
|
|
static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
struct ofpbuf *actions;
|
|
|
|
|
|
|
|
|
|
odp_format_ufid(&ukey->ufid, &ds);
|
|
|
|
|
ds_put_cstr(&ds, ", ");
|
|
|
|
|
odp_flow_key_format(ukey->key, ukey->key_len, &ds);
|
|
|
|
|
ds_put_cstr(&ds, ", actions:");
|
|
|
|
|
actions = ovsrcu_get(struct ofpbuf *, &ukey->actions);
|
|
|
|
|
format_odp_actions(&ds, actions->data, actions->size, NULL);
|
|
|
|
|
VLOG_WARN_RL(&rll, "Unexpected jump in packet stats from %"PRIu64
|
|
|
|
|
" to %"PRIu64" when handling ukey %s",
|
|
|
|
|
ukey->stats.n_packets, stats->n_packets, ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 13:13:04 -07:00
|
|
|
|
/* Verifies that the datapath actions of 'ukey' are still correct, and pushes
|
|
|
|
|
* 'stats' for it.
|
|
|
|
|
*
|
|
|
|
|
* Returns a recommended action for 'ukey', options include:
|
|
|
|
|
* UKEY_DELETE The ukey should be deleted.
|
|
|
|
|
* UKEY_KEEP The ukey is fine as is.
|
|
|
|
|
* UKEY_MODIFY The ukey's actions should be changed but is otherwise
|
|
|
|
|
* fine. Callers should change the actions to those found
|
|
|
|
|
* in the caller supplied 'odp_actions' buffer. The
|
|
|
|
|
* recirculation references can be found in 'recircs' and
|
|
|
|
|
* must be handled by the caller.
|
|
|
|
|
*
|
|
|
|
|
* If the result is UKEY_MODIFY, then references to all recirc_ids used by the
|
|
|
|
|
* new flow will be held within 'recircs' (which may be none).
|
|
|
|
|
*
|
|
|
|
|
* The caller is responsible for both initializing 'recircs' prior this call,
|
|
|
|
|
* and ensuring any references are eventually freed.
|
|
|
|
|
*/
|
|
|
|
|
static enum reval_result
|
|
|
|
|
revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
const struct dpif_flow_stats *stats,
|
|
|
|
|
struct ofpbuf *odp_actions, uint64_t reval_seq,
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
struct recirc_refs *recircs, enum flow_del_reason *del_reason)
|
2016-09-20 13:13:04 -07:00
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
|
|
|
|
{
|
|
|
|
|
bool need_revalidate = ukey->reval_seq != reval_seq;
|
|
|
|
|
enum reval_result result = UKEY_DELETE;
|
|
|
|
|
struct dpif_flow_stats push;
|
|
|
|
|
|
|
|
|
|
ofpbuf_clear(odp_actions);
|
|
|
|
|
|
|
|
|
|
push.used = stats->used;
|
|
|
|
|
push.tcp_flags = stats->tcp_flags;
|
2023-05-26 14:03:38 +02:00
|
|
|
|
push.n_packets = stats->n_packets - ukey->stats.n_packets;
|
|
|
|
|
push.n_bytes = stats->n_bytes - ukey->stats.n_bytes;
|
2016-09-20 13:13:04 -07:00
|
|
|
|
|
2023-02-27 16:29:26 +01:00
|
|
|
|
if (stats->n_packets < ukey->stats.n_packets &&
|
|
|
|
|
ukey->stats.n_packets < UINT64_THREE_QUARTERS) {
|
|
|
|
|
/* Report cases where the packet counter is lower than the previous
|
|
|
|
|
* instance, but exclude the potential wrapping of an uint64_t. */
|
|
|
|
|
COVERAGE_INC(ukey_invalid_stat_reset);
|
2023-05-26 14:03:38 +02:00
|
|
|
|
log_unexpected_stats_jump(ukey, stats);
|
2023-02-27 16:29:26 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 14:58:00 -07:00
|
|
|
|
if (need_revalidate) {
|
2023-03-08 13:55:44 +01:00
|
|
|
|
if (should_revalidate(udpif, ukey, push.n_packets)) {
|
2016-09-20 14:58:00 -07:00
|
|
|
|
if (!ukey->xcache) {
|
|
|
|
|
ukey->xcache = xlate_cache_new();
|
|
|
|
|
} else {
|
|
|
|
|
xlate_cache_clear(ukey->xcache);
|
|
|
|
|
}
|
|
|
|
|
result = revalidate_ukey__(udpif, ukey, push.tcp_flags,
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
odp_actions, recircs, ukey->xcache,
|
|
|
|
|
del_reason);
|
|
|
|
|
} else {
|
|
|
|
|
/* Delete, since it is too expensive to revalidate. */
|
|
|
|
|
*del_reason = FDR_TOO_EXPENSIVE;
|
|
|
|
|
}
|
2016-09-20 14:58:00 -07:00
|
|
|
|
} else if (!push.n_packets || ukey->xcache
|
|
|
|
|
|| !populate_xcache(udpif, ukey, push.tcp_flags)) {
|
|
|
|
|
result = UKEY_KEEP;
|
2016-09-20 13:13:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 14:08:21 -07:00
|
|
|
|
/* Stats for deleted flows will be attributed upon flow deletion. Skip. */
|
2016-09-20 13:13:04 -07:00
|
|
|
|
if (result != UKEY_DELETE) {
|
2023-03-08 13:55:44 +01:00
|
|
|
|
xlate_push_stats(ukey->xcache, &push, ukey->offloaded);
|
2016-09-20 14:08:21 -07:00
|
|
|
|
ukey->stats = *stats;
|
2016-09-20 13:13:04 -07:00
|
|
|
|
ukey->reval_seq = reval_seq;
|
|
|
|
|
}
|
2016-09-20 14:58:00 -07:00
|
|
|
|
|
2016-09-20 13:13:04 -07:00
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
static void
|
2014-12-16 17:44:40 -08:00
|
|
|
|
delete_op_init__(struct udpif *udpif, struct ukey_op *op,
|
|
|
|
|
const struct dpif_flow *flow)
|
2014-10-06 11:14:08 +13:00
|
|
|
|
{
|
2014-12-08 17:14:39 -08:00
|
|
|
|
op->ukey = NULL;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
op->dop.type = DPIF_OP_FLOW_DEL;
|
2018-05-24 10:32:59 -07:00
|
|
|
|
op->dop.flow_del.key = flow->key;
|
|
|
|
|
op->dop.flow_del.key_len = flow->key_len;
|
|
|
|
|
op->dop.flow_del.ufid = flow->ufid_present ? &flow->ufid : NULL;
|
|
|
|
|
op->dop.flow_del.pmd_id = flow->pmd_id;
|
|
|
|
|
op->dop.flow_del.stats = &op->stats;
|
|
|
|
|
op->dop.flow_del.terse = udpif_use_ufid(udpif);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void
|
2014-12-16 17:44:40 -08:00
|
|
|
|
delete_op_init(struct udpif *udpif, struct ukey_op *op, struct udpif_key *ukey)
|
2014-02-11 13:55:34 -08:00
|
|
|
|
{
|
|
|
|
|
op->ukey = ukey;
|
2014-08-21 00:21:03 +12:00
|
|
|
|
op->dop.type = DPIF_OP_FLOW_DEL;
|
2018-05-24 10:32:59 -07:00
|
|
|
|
op->dop.flow_del.key = ukey->key;
|
|
|
|
|
op->dop.flow_del.key_len = ukey->key_len;
|
|
|
|
|
op->dop.flow_del.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
|
|
|
|
|
op->dop.flow_del.pmd_id = ukey->pmd_id;
|
|
|
|
|
op->dop.flow_del.stats = &op->stats;
|
|
|
|
|
op->dop.flow_del.terse = udpif_use_ufid(udpif);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
}
|
|
|
|
|
|
2015-08-03 18:43:53 -07:00
|
|
|
|
static void
|
2016-08-31 11:06:02 -07:00
|
|
|
|
put_op_init(struct ukey_op *op, struct udpif_key *ukey,
|
|
|
|
|
enum dpif_flow_put_flags flags)
|
2015-08-03 18:43:53 -07:00
|
|
|
|
{
|
|
|
|
|
op->ukey = ukey;
|
|
|
|
|
op->dop.type = DPIF_OP_FLOW_PUT;
|
2018-05-24 10:32:59 -07:00
|
|
|
|
op->dop.flow_put.flags = flags;
|
|
|
|
|
op->dop.flow_put.key = ukey->key;
|
|
|
|
|
op->dop.flow_put.key_len = ukey->key_len;
|
|
|
|
|
op->dop.flow_put.mask = ukey->mask;
|
|
|
|
|
op->dop.flow_put.mask_len = ukey->mask_len;
|
|
|
|
|
op->dop.flow_put.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
|
|
|
|
|
op->dop.flow_put.pmd_id = ukey->pmd_id;
|
|
|
|
|
op->dop.flow_put.stats = NULL;
|
|
|
|
|
ukey_get_actions(ukey, &op->dop.flow_put.actions,
|
|
|
|
|
&op->dop.flow_put.actions_len);
|
2015-08-03 18:43:53 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-01-07 16:16:25 -08:00
|
|
|
|
/* Executes datapath operations 'ops' and attributes stats retrieved from the
|
|
|
|
|
* datapath as part of those operations. */
|
2014-02-11 13:55:34 -08:00
|
|
|
|
static void
|
2016-01-07 16:16:25 -08:00
|
|
|
|
push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2014-02-11 13:55:34 -08:00
|
|
|
|
struct dpif_op *opsp[REVALIDATE_MAX_BATCH];
|
|
|
|
|
size_t i;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
ovs_assert(n_ops <= REVALIDATE_MAX_BATCH);
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
2014-08-21 00:21:03 +12:00
|
|
|
|
opsp[i] = &ops[i].dop;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
}
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
dpif_operate(udpif->dpif, opsp, n_ops, DPIF_OFFLOAD_AUTO);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
2014-08-21 00:21:03 +12:00
|
|
|
|
struct ukey_op *op = &ops[i];
|
2015-08-03 18:43:53 -07:00
|
|
|
|
|
2015-08-28 05:05:07 +00:00
|
|
|
|
if (op->dop.error) {
|
2017-09-06 15:12:52 -07:00
|
|
|
|
if (op->ukey) {
|
|
|
|
|
ovs_mutex_lock(&op->ukey->mutex);
|
2023-07-01 05:11:16 +00:00
|
|
|
|
if (op->dop.type == DPIF_OP_FLOW_DEL) {
|
|
|
|
|
transition_ukey(op->ukey, UKEY_EVICTED);
|
|
|
|
|
} else {
|
|
|
|
|
/* Modification of the flow failed. */
|
|
|
|
|
transition_ukey(op->ukey, UKEY_INCONSISTENT);
|
|
|
|
|
}
|
2017-09-06 15:12:52 -07:00
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
|
|
|
|
}
|
2015-08-28 05:05:07 +00:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-01 05:11:16 +00:00
|
|
|
|
if (op->dop.type != DPIF_OP_FLOW_DEL) {
|
|
|
|
|
/* Only deleted flows need their stats pushed. */
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct dpif_flow_stats *push, *stats, push_buf;
|
|
|
|
|
|
|
|
|
|
stats = op->dop.flow_del.stats;
|
|
|
|
|
push = &push_buf;
|
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
if (op->ukey) {
|
|
|
|
|
ovs_mutex_lock(&op->ukey->mutex);
|
2016-08-31 11:06:04 -07:00
|
|
|
|
transition_ukey(op->ukey, UKEY_EVICTED);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
push->used = MAX(stats->used, op->ukey->stats.used);
|
|
|
|
|
push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags;
|
|
|
|
|
push->n_packets = stats->n_packets - op->ukey->stats.n_packets;
|
|
|
|
|
push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes;
|
2023-02-27 16:29:26 +01:00
|
|
|
|
|
|
|
|
|
if (stats->n_packets < op->ukey->stats.n_packets &&
|
|
|
|
|
op->ukey->stats.n_packets < UINT64_THREE_QUARTERS) {
|
|
|
|
|
/* Report cases where the packet counter is lower than the
|
|
|
|
|
* previous instance, but exclude the potential wrapping of an
|
|
|
|
|
* uint64_t. */
|
|
|
|
|
COVERAGE_INC(ukey_invalid_stat_reset);
|
|
|
|
|
}
|
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
|
|
|
|
} else {
|
|
|
|
|
push = stats;
|
|
|
|
|
}
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
|
|
|
|
if (push->n_packets || netflow_exists()) {
|
2018-05-24 10:32:59 -07:00
|
|
|
|
const struct nlattr *key = op->dop.flow_del.key;
|
|
|
|
|
size_t key_len = op->dop.flow_del.key_len;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
struct netflow *netflow;
|
2016-09-20 16:41:31 -07:00
|
|
|
|
struct reval_context ctx = {
|
|
|
|
|
.netflow = &netflow,
|
|
|
|
|
};
|
2014-07-01 09:54:18 +00:00
|
|
|
|
int error;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
if (op->ukey) {
|
|
|
|
|
ovs_mutex_lock(&op->ukey->mutex);
|
|
|
|
|
if (op->ukey->xcache) {
|
Add offload packets statistics
Add argument '--offload-stats' for command ovs-appctl bridge/dump-flows
to display the offloaded packets statistics.
The commands display as below:
orignal command:
ovs-appctl bridge/dump-flows br0
duration=574s, n_packets=1152, n_bytes=110768, priority=0,actions=NORMAL
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=2,recirc_id=0,actions=drop
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x1,actions=controller(reason=)
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x2,actions=drop
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x3,actions=drop
new command with argument '--offload-stats'
Notice: 'n_offload_packets' are a subset of n_packets and 'n_offload_bytes' are
a subset of n_bytes.
ovs-appctl bridge/dump-flows --offload-stats br0
duration=582s, n_packets=1152, n_bytes=110768, n_offload_packets=1107, n_offload_bytes=107992, priority=0,actions=NORMAL
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=2,recirc_id=0,actions=drop
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x1,actions=controller(reason=)
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x2,actions=drop
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x3,actions=drop
Signed-off-by: zhaozhanxu <zhaozhanxu@163.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2019-12-05 14:26:25 +08:00
|
|
|
|
xlate_push_stats(op->ukey->xcache, push, false);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
key = op->ukey->key;
|
|
|
|
|
key_len = op->ukey->key_len;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
}
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2016-09-20 16:41:31 -07:00
|
|
|
|
error = xlate_key(udpif, key, key_len, push, &ctx);
|
|
|
|
|
if (error) {
|
2018-02-27 10:44:13 -08:00
|
|
|
|
static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
VLOG_WARN_RL(&rll, "xlate_key failed (%s)!",
|
2017-05-01 12:58:07 -07:00
|
|
|
|
ovs_strerror(error));
|
2016-09-20 16:41:31 -07:00
|
|
|
|
} else {
|
|
|
|
|
xlate_out_uninit(&ctx.xout);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
if (netflow) {
|
2016-09-20 16:41:31 -07:00
|
|
|
|
netflow_flow_clear(netflow, &ctx.flow);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2016-01-07 16:16:25 -08:00
|
|
|
|
/* Executes datapath operations 'ops', attributes stats retrieved from the
|
|
|
|
|
* datapath, and deletes ukeys corresponding to deleted flows. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
static void
|
2014-08-21 00:21:03 +12:00
|
|
|
|
push_ukey_ops(struct udpif *udpif, struct umap *umap,
|
|
|
|
|
struct ukey_op *ops, size_t n_ops)
|
2014-04-10 07:14:08 +00:00
|
|
|
|
{
|
|
|
|
|
int i;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2016-01-07 16:16:25 -08:00
|
|
|
|
push_dp_ops(udpif, ops, n_ops);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
ovs_mutex_lock(&umap->mutex);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
2016-01-07 11:47:46 -08:00
|
|
|
|
if (ops[i].dop.type == DPIF_OP_FLOW_DEL) {
|
|
|
|
|
ukey_delete(umap, ops[i].ukey);
|
|
|
|
|
}
|
2014-02-11 13:55:34 -08:00
|
|
|
|
}
|
2014-06-05 17:28:46 +12:00
|
|
|
|
ovs_mutex_unlock(&umap->mutex);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
}
|
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
static void
|
|
|
|
|
log_unexpected_flow(const struct dpif_flow *flow, int error)
|
|
|
|
|
{
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
|
|
|
|
|
ds_put_format(&ds, "Failed to acquire udpif_key corresponding to "
|
|
|
|
|
"unexpected flow (%s): ", ovs_strerror(error));
|
|
|
|
|
odp_format_ufid(&flow->ufid, &ds);
|
2018-02-27 10:44:13 -08:00
|
|
|
|
|
|
|
|
|
static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(10, 60);
|
|
|
|
|
VLOG_WARN_RL(&rll, "%s", ds_cstr(&ds));
|
|
|
|
|
|
2016-06-08 13:04:11 -03:00
|
|
|
|
ds_destroy(&ds);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
static void
|
|
|
|
|
reval_op_init(struct ukey_op *op, enum reval_result result,
|
|
|
|
|
struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
struct recirc_refs *recircs, struct ofpbuf *odp_actions)
|
2016-08-31 11:06:04 -07:00
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
2015-11-25 15:19:37 -08:00
|
|
|
|
{
|
|
|
|
|
if (result == UKEY_DELETE) {
|
|
|
|
|
delete_op_init(udpif, op, ukey);
|
2016-08-31 11:06:04 -07:00
|
|
|
|
transition_ukey(ukey, UKEY_EVICTING);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
} else if (result == UKEY_MODIFY) {
|
|
|
|
|
/* Store the new recircs. */
|
|
|
|
|
recirc_refs_swap(&ukey->recircs, recircs);
|
|
|
|
|
/* Release old recircs. */
|
|
|
|
|
recirc_refs_unref(recircs);
|
|
|
|
|
/* ukey->key_recirc_id remains, as the key is the same as before. */
|
|
|
|
|
|
|
|
|
|
ukey_set_actions(ukey, odp_actions);
|
2016-08-31 11:06:02 -07:00
|
|
|
|
put_op_init(op, ukey, DPIF_FP_MODIFY);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
static void
|
|
|
|
|
ukey_netdev_unref(struct udpif_key *ukey)
|
|
|
|
|
{
|
|
|
|
|
if (!ukey->in_netdev) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
netdev_close(ukey->in_netdev);
|
|
|
|
|
ukey->in_netdev = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Given a udpif_key, get its input port (netdev) by parsing the flow keys
|
|
|
|
|
* and actions. The flow may not contain flow attributes if it is a terse
|
|
|
|
|
* dump; read its attributes from the ukey and then parse the flow to get
|
|
|
|
|
* the port info. Save them in udpif_key.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey)
|
|
|
|
|
{
|
2020-07-08 06:38:29 +00:00
|
|
|
|
const char *dpif_type_str = dpif_normalize_type(dpif_type(udpif->dpif));
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
const struct nlattr *k;
|
|
|
|
|
unsigned int left;
|
|
|
|
|
|
|
|
|
|
/* Remove existing references to netdev */
|
|
|
|
|
ukey_netdev_unref(ukey);
|
|
|
|
|
|
|
|
|
|
/* Find the input port and get a reference to its netdev */
|
|
|
|
|
NL_ATTR_FOR_EACH (k, left, ukey->key, ukey->key_len) {
|
|
|
|
|
enum ovs_key_attr type = nl_attr_type(k);
|
|
|
|
|
|
|
|
|
|
if (type == OVS_KEY_ATTR_IN_PORT) {
|
|
|
|
|
ukey->in_netdev = netdev_ports_get(nl_attr_get_odp_port(k),
|
2020-07-08 06:38:29 +00:00
|
|
|
|
dpif_type_str);
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
} else if (type == OVS_KEY_ATTR_TUNNEL) {
|
|
|
|
|
struct flow_tnl tnl;
|
|
|
|
|
enum odp_key_fitness res;
|
|
|
|
|
|
|
|
|
|
if (ukey->in_netdev) {
|
|
|
|
|
netdev_close(ukey->in_netdev);
|
|
|
|
|
ukey->in_netdev = NULL;
|
|
|
|
|
}
|
2018-12-14 18:16:55 -08:00
|
|
|
|
res = odp_tun_key_from_attr(k, &tnl, NULL);
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
if (res != ODP_FIT_ERROR) {
|
|
|
|
|
ukey->in_netdev = flow_get_tunnel_netdev(&tnl);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-18 21:43:13 +05:30
|
|
|
|
static uint64_t
|
|
|
|
|
udpif_flow_packet_delta(struct udpif_key *ukey, const struct dpif_flow *f)
|
|
|
|
|
{
|
|
|
|
|
return f->stats.n_packets + ukey->flow_backlog_packets -
|
|
|
|
|
ukey->flow_packets;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static long long int
|
|
|
|
|
udpif_flow_time_delta(struct udpif *udpif, struct udpif_key *ukey)
|
|
|
|
|
{
|
|
|
|
|
return (udpif->dpif->current_ms - ukey->flow_time) / 1000;
|
|
|
|
|
}
|
|
|
|
|
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
/*
|
|
|
|
|
* Save backlog packet count while switching modes
|
|
|
|
|
* between offloaded and kernel datapaths.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
udpif_set_ukey_backlog_packets(struct udpif_key *ukey)
|
|
|
|
|
{
|
|
|
|
|
ukey->flow_backlog_packets = ukey->flow_packets;
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-18 21:43:13 +05:30
|
|
|
|
/* Gather pps-rate for the given dpif_flow and save it in its ukey */
|
|
|
|
|
static void
|
|
|
|
|
udpif_update_flow_pps(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
const struct dpif_flow *f)
|
|
|
|
|
{
|
|
|
|
|
uint64_t pps;
|
|
|
|
|
|
|
|
|
|
/* Update pps-rate only when we are close to rebalance interval */
|
|
|
|
|
if (udpif->dpif->current_ms - ukey->flow_time < OFFL_REBAL_INTVL_MSEC) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ukey->offloaded = f->attrs.offloaded;
|
|
|
|
|
pps = udpif_flow_packet_delta(ukey, f) /
|
|
|
|
|
udpif_flow_time_delta(udpif, ukey);
|
|
|
|
|
ukey->flow_pps_rate = pps;
|
|
|
|
|
ukey->flow_packets = ukey->flow_backlog_packets + f->stats.n_packets;
|
|
|
|
|
ukey->flow_time = udpif->dpif->current_ms;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-04 13:47:00 +03:00
|
|
|
|
static long long int
|
|
|
|
|
udpif_update_used(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
struct dpif_flow_stats *stats)
|
|
|
|
|
OVS_REQUIRES(ukey->mutex)
|
|
|
|
|
{
|
|
|
|
|
if (!udpif->dump->terse) {
|
|
|
|
|
return ukey->created;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (stats->n_packets > ukey->stats.n_packets) {
|
|
|
|
|
stats->used = udpif->dpif->current_ms;
|
|
|
|
|
} else if (ukey->stats.used) {
|
|
|
|
|
stats->used = ukey->stats.used;
|
|
|
|
|
} else {
|
|
|
|
|
stats->used = ukey->created;
|
|
|
|
|
}
|
|
|
|
|
return stats->used;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
static void
|
2014-04-10 07:14:08 +00:00
|
|
|
|
revalidate(struct revalidator *revalidator)
|
2014-02-11 13:55:34 -08:00
|
|
|
|
{
|
2015-08-03 18:43:53 -07:00
|
|
|
|
uint64_t odp_actions_stub[1024 / 8];
|
|
|
|
|
struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
|
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
struct udpif *udpif = revalidator->udpif;
|
2014-05-20 11:37:02 -07:00
|
|
|
|
struct dpif_flow_dump_thread *dump_thread;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
uint64_t dump_seq, reval_seq;
|
2020-09-30 16:23:59 -03:00
|
|
|
|
bool kill_warn_print = true;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unsigned int flow_limit;
|
|
|
|
|
|
2014-05-14 16:17:25 +12:00
|
|
|
|
dump_seq = seq_read(udpif->dump_seq);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
reval_seq = seq_read(udpif->reval_seq);
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
|
2014-05-20 11:37:02 -07:00
|
|
|
|
dump_thread = dpif_flow_dump_thread_create(udpif->dump);
|
|
|
|
|
for (;;) {
|
2014-08-21 00:21:03 +12:00
|
|
|
|
struct ukey_op ops[REVALIDATE_MAX_BATCH];
|
2014-05-20 11:37:02 -07:00
|
|
|
|
int n_ops = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
struct dpif_flow flows[REVALIDATE_MAX_BATCH];
|
|
|
|
|
const struct dpif_flow *f;
|
|
|
|
|
int n_dumped;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
long long int max_idle;
|
|
|
|
|
long long int now;
|
2020-09-30 16:23:59 -03:00
|
|
|
|
size_t kill_all_limit;
|
2014-05-20 11:37:02 -07:00
|
|
|
|
size_t n_dp_flows;
|
|
|
|
|
bool kill_them_all;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));
|
|
|
|
|
if (!n_dumped) {
|
|
|
|
|
break;
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
/* In normal operation we want to keep flows around until they have
|
|
|
|
|
* been idle for 'ofproto_max_idle' milliseconds. However:
|
|
|
|
|
*
|
|
|
|
|
* - If the number of datapath flows climbs above 'flow_limit',
|
|
|
|
|
* drop that down to 100 ms to try to bring the flows down to
|
|
|
|
|
* the limit.
|
|
|
|
|
*
|
|
|
|
|
* - If the number of datapath flows climbs above twice
|
|
|
|
|
* 'flow_limit', delete all the datapath flows as an emergency
|
|
|
|
|
* measure. (We reassess this condition for the next batch of
|
|
|
|
|
* datapath flows, so we will recover before all the flows are
|
|
|
|
|
* gone.) */
|
|
|
|
|
n_dp_flows = udpif_get_n_flows(udpif);
|
2020-04-20 19:13:42 +05:30
|
|
|
|
if (n_dp_flows >= flow_limit) {
|
|
|
|
|
COVERAGE_INC(upcall_flow_limit_hit);
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-30 16:23:59 -03:00
|
|
|
|
kill_them_all = false;
|
|
|
|
|
kill_all_limit = flow_limit * 2;
|
|
|
|
|
if (OVS_UNLIKELY(n_dp_flows > kill_all_limit)) {
|
|
|
|
|
static struct vlog_rate_limit rlem = VLOG_RATE_LIMIT_INIT(1, 1);
|
|
|
|
|
|
|
|
|
|
kill_them_all = true;
|
|
|
|
|
COVERAGE_INC(upcall_flow_limit_kill);
|
|
|
|
|
if (kill_warn_print) {
|
|
|
|
|
kill_warn_print = false;
|
|
|
|
|
VLOG_WARN_RL(&rlem,
|
|
|
|
|
"Number of datapath flows (%"PRIuSIZE") twice as high as "
|
|
|
|
|
"current dynamic flow limit (%"PRIuSIZE"). "
|
|
|
|
|
"Starting to delete flows unconditionally "
|
|
|
|
|
"as an emergency measure.", n_dp_flows, kill_all_limit);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;
|
|
|
|
|
|
2023-03-09 13:30:16 +01:00
|
|
|
|
udpif->dpif->current_ms = now = time_msec();
|
2014-05-20 11:37:02 -07:00
|
|
|
|
for (f = flows; f < &flows[n_dumped]; f++) {
|
|
|
|
|
long long int used = f->stats.used;
|
2015-11-25 15:19:37 -08:00
|
|
|
|
struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
enum flow_del_reason del_reason = FDR_NONE;
|
2020-06-04 13:47:00 +03:00
|
|
|
|
struct dpif_flow_stats stats = f->stats;
|
2015-08-03 18:43:53 -07:00
|
|
|
|
enum reval_result result;
|
2014-06-04 09:59:23 +00:00
|
|
|
|
struct udpif_key *ukey;
|
2015-08-03 18:43:53 -07:00
|
|
|
|
bool already_dumped;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
int error;
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
if (ukey_acquire(udpif, f, &ukey, &error)) {
|
|
|
|
|
if (error == EBUSY) {
|
|
|
|
|
/* Another thread is processing this flow, so don't bother
|
|
|
|
|
* processing it.*/
|
|
|
|
|
COVERAGE_INC(upcall_ukey_contention);
|
|
|
|
|
} else {
|
|
|
|
|
log_unexpected_flow(f, error);
|
2014-12-10 11:20:10 -08:00
|
|
|
|
if (error != ENOENT) {
|
2014-12-16 17:44:40 -08:00
|
|
|
|
delete_op_init__(udpif, &ops[n_ops++], f);
|
2014-12-10 11:20:10 -08:00
|
|
|
|
}
|
2014-10-06 11:14:08 +13:00
|
|
|
|
}
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-27 16:29:26 +01:00
|
|
|
|
ukey->offloaded = f->attrs.offloaded;
|
|
|
|
|
if (!ukey->dp_layer
|
|
|
|
|
|| (!dpif_synced_dp_layers(udpif->dpif)
|
|
|
|
|
&& strcmp(ukey->dp_layer, f->attrs.dp_layer))) {
|
|
|
|
|
|
|
|
|
|
if (ukey->dp_layer) {
|
|
|
|
|
/* The dp_layer has changed this is probably due to an
|
|
|
|
|
* earlier revalidate cycle moving it to/from hw offload.
|
|
|
|
|
* In this case we should reset the ukey stored statistics,
|
|
|
|
|
* as they are from the deleted DP flow. */
|
|
|
|
|
COVERAGE_INC(ukey_dp_change);
|
|
|
|
|
memset(&ukey->stats, 0, sizeof ukey->stats);
|
|
|
|
|
}
|
|
|
|
|
ukey->dp_layer = f->attrs.dp_layer;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-14 16:17:25 +12:00
|
|
|
|
already_dumped = ukey->dump_seq == dump_seq;
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
if (already_dumped) {
|
2014-09-26 17:28:05 +00:00
|
|
|
|
/* The flow has already been handled during this flow dump
|
|
|
|
|
* operation. Skip it. */
|
|
|
|
|
if (ukey->xcache) {
|
|
|
|
|
COVERAGE_INC(dumped_duplicate_flow);
|
|
|
|
|
} else {
|
|
|
|
|
COVERAGE_INC(dumped_new_flow);
|
|
|
|
|
}
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-01 05:11:16 +00:00
|
|
|
|
if (ukey->state == UKEY_INCONSISTENT) {
|
|
|
|
|
ukey->dump_seq = dump_seq;
|
|
|
|
|
reval_op_init(&ops[n_ops++], UKEY_DELETE, udpif, ukey,
|
|
|
|
|
&recircs, &odp_actions);
|
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
|
|
|
|
COVERAGE_INC(dumped_inconsistent_flow);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-26 18:03:11 -07:00
|
|
|
|
if (ukey->state <= UKEY_OPERATIONAL) {
|
|
|
|
|
/* The flow is now confirmed to be in the datapath. */
|
|
|
|
|
transition_ukey(ukey, UKEY_OPERATIONAL);
|
|
|
|
|
} else {
|
2017-04-26 18:03:12 -07:00
|
|
|
|
VLOG_INFO("Unexpected ukey transition from state %d "
|
|
|
|
|
"(last transitioned from thread %u at %s)",
|
|
|
|
|
ukey->state, ukey->state_thread, ukey->state_where);
|
2017-04-26 18:03:11 -07:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2016-08-31 11:06:04 -07:00
|
|
|
|
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
if (!used) {
|
2020-06-04 13:47:00 +03:00
|
|
|
|
used = udpif_update_used(udpif, ukey, &stats);
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
}
|
2014-05-20 11:37:02 -07:00
|
|
|
|
if (kill_them_all || (used && used < now - max_idle)) {
|
2015-08-03 18:43:53 -07:00
|
|
|
|
result = UKEY_DELETE;
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
del_reason = (kill_them_all) ? FDR_FLOW_LIMIT : FDR_FLOW_IDLE;
|
2014-05-20 11:37:02 -07:00
|
|
|
|
} else {
|
2020-06-04 13:47:00 +03:00
|
|
|
|
result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
reval_seq, &recircs, &del_reason);
|
2014-05-20 11:37:02 -07:00
|
|
|
|
}
|
2014-05-14 16:17:25 +12:00
|
|
|
|
ukey->dump_seq = dump_seq;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2018-10-18 21:43:13 +05:30
|
|
|
|
if (netdev_is_offload_rebalance_policy_enabled() &&
|
|
|
|
|
result != UKEY_DELETE) {
|
|
|
|
|
udpif_update_flow_pps(udpif, ukey, f);
|
|
|
|
|
}
|
|
|
|
|
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
OVS_USDT_PROBE(revalidate, flow_result, udpif, ukey, result,
|
|
|
|
|
del_reason);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
if (result != UKEY_KEEP) {
|
|
|
|
|
/* Takes ownership of 'recircs'. */
|
|
|
|
|
reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
|
|
|
|
|
&odp_actions);
|
2014-05-20 11:37:02 -07:00
|
|
|
|
}
|
revalidator: Eliminate duplicate flow handling.
A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.
The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.
Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2014-05-28 15:23:42 +12:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
2014-02-11 13:55:33 -08:00
|
|
|
|
|
2014-05-20 11:37:02 -07:00
|
|
|
|
if (n_ops) {
|
2016-01-07 16:16:25 -08:00
|
|
|
|
/* Push datapath ops but defer ukey deletion to 'sweep' phase. */
|
|
|
|
|
push_dp_ops(udpif, ops, n_ops);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
2014-05-05 15:44:40 +12:00
|
|
|
|
ovsrcu_quiesce();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2014-05-20 11:37:02 -07:00
|
|
|
|
dpif_flow_dump_thread_destroy(dump_thread);
|
2015-08-03 18:43:53 -07:00
|
|
|
|
ofpbuf_uninit(&odp_actions);
|
2014-07-08 07:04:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-08-29 06:09:45 +00:00
|
|
|
|
/* Pauses the 'revalidator', can only proceed after main thread
|
|
|
|
|
* calls udpif_resume_revalidators(). */
|
|
|
|
|
static void
|
|
|
|
|
revalidator_pause(struct revalidator *revalidator)
|
|
|
|
|
{
|
|
|
|
|
/* The first block is for sync'ing the pause with main thread. */
|
|
|
|
|
ovs_barrier_block(&revalidator->udpif->pause_barrier);
|
|
|
|
|
/* The second block is for pausing until main thread resumes. */
|
|
|
|
|
ovs_barrier_block(&revalidator->udpif->pause_barrier);
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void
|
2014-02-11 13:55:36 -08:00
|
|
|
|
revalidator_sweep__(struct revalidator *revalidator, bool purge)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2014-06-05 17:28:46 +12:00
|
|
|
|
struct udpif *udpif;
|
2014-07-25 13:54:24 +12:00
|
|
|
|
uint64_t dump_seq, reval_seq;
|
2014-06-05 17:28:46 +12:00
|
|
|
|
int slice;
|
2014-02-11 13:55:35 -08:00
|
|
|
|
|
2014-06-05 17:28:46 +12:00
|
|
|
|
udpif = revalidator->udpif;
|
|
|
|
|
dump_seq = seq_read(udpif->dump_seq);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
reval_seq = seq_read(udpif->reval_seq);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
slice = revalidator - udpif->revalidators;
|
|
|
|
|
ovs_assert(slice < udpif->n_revalidators);
|
|
|
|
|
|
|
|
|
|
for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) {
|
2015-08-03 18:43:53 -07:00
|
|
|
|
uint64_t odp_actions_stub[1024 / 8];
|
|
|
|
|
struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
|
|
|
|
|
|
2014-08-21 00:21:03 +12:00
|
|
|
|
struct ukey_op ops[REVALIDATE_MAX_BATCH];
|
2014-06-05 17:28:46 +12:00
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
struct umap *umap = &udpif->ukeys[i];
|
|
|
|
|
size_t n_ops = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-06-05 17:28:46 +12:00
|
|
|
|
CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
enum flow_del_reason del_reason = FDR_NONE;
|
2016-08-31 11:06:04 -07:00
|
|
|
|
enum ukey_state ukey_state;
|
2014-05-05 15:44:04 +12:00
|
|
|
|
|
2014-07-25 13:54:24 +12:00
|
|
|
|
/* Handler threads could be holding a ukey lock while it installs a
|
|
|
|
|
* new flow, so don't hang around waiting for access to it. */
|
|
|
|
|
if (ovs_mutex_trylock(&ukey->mutex)) {
|
2024-04-04 14:09:37 +02:00
|
|
|
|
COVERAGE_INC(upcall_ukey_contention);
|
2014-07-25 13:54:24 +12:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2016-08-31 11:06:04 -07:00
|
|
|
|
ukey_state = ukey->state;
|
|
|
|
|
if (ukey_state == UKEY_OPERATIONAL
|
2023-07-01 05:11:16 +00:00
|
|
|
|
|| (ukey_state == UKEY_INCONSISTENT)
|
2016-08-31 11:06:04 -07:00
|
|
|
|
|| (ukey_state == UKEY_VISIBLE && purge)) {
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
|
|
|
|
|
bool seq_mismatch = (ukey->dump_seq != dump_seq
|
|
|
|
|
&& ukey->reval_seq != reval_seq);
|
|
|
|
|
enum reval_result result;
|
|
|
|
|
|
2023-07-01 05:11:16 +00:00
|
|
|
|
if (purge || ukey_state == UKEY_INCONSISTENT) {
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
result = UKEY_DELETE;
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
del_reason = purge ? FDR_PURGE : FDR_UPDATE_FAIL;
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
} else if (!seq_mismatch) {
|
|
|
|
|
result = UKEY_KEEP;
|
|
|
|
|
} else {
|
|
|
|
|
struct dpif_flow_stats stats;
|
|
|
|
|
COVERAGE_INC(revalidate_missed_dp_flow);
|
2023-01-05 13:56:59 +01:00
|
|
|
|
memcpy(&stats, &ukey->stats, sizeof stats);
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
reval_seq, &recircs, &del_reason);
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
}
|
2024-08-29 09:00:06 +02:00
|
|
|
|
|
|
|
|
|
if (ukey->dump_seq != dump_seq) {
|
|
|
|
|
ukey->missed_dumps++;
|
|
|
|
|
if (ukey->missed_dumps >= 4) {
|
|
|
|
|
/* If the flow was not dumped for 4 revalidator rounds,
|
|
|
|
|
* we can assume the datapath flow no longer exists
|
|
|
|
|
* and the ukey should be deleted. */
|
|
|
|
|
COVERAGE_INC(revalidate_missing_dp_flow);
|
|
|
|
|
del_reason = FDR_FLOW_MISSING_DP;
|
|
|
|
|
result = UKEY_DELETE;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
ukey->missed_dumps = 0;
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
if (result != UKEY_KEEP) {
|
|
|
|
|
/* Clears 'recircs' if filled by revalidate_ukey(). */
|
|
|
|
|
reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
|
|
|
|
|
&odp_actions);
|
|
|
|
|
}
|
revalidator: Add a USDT probe during flow deletion with purge reason.
During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.
Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.
This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed. Additionally, we track the
reason for the flow eviction and provide that information as well. With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.
This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).
Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.
Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2024-03-05 10:44:41 -05:00
|
|
|
|
OVS_USDT_PROBE(revalidator_sweep__, flow_sweep_result, udpif,
|
|
|
|
|
ukey, result, del_reason);
|
2015-08-03 18:43:53 -07:00
|
|
|
|
}
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2015-08-03 18:43:53 -07:00
|
|
|
|
|
2016-08-31 11:06:04 -07:00
|
|
|
|
if (ukey_state == UKEY_EVICTED) {
|
2016-01-07 16:16:25 -08:00
|
|
|
|
/* The common flow deletion case involves deletion of the flow
|
|
|
|
|
* during the dump phase and ukey deletion here. */
|
2014-06-05 17:28:46 +12:00
|
|
|
|
ovs_mutex_lock(&umap->mutex);
|
|
|
|
|
ukey_delete(umap, ukey);
|
|
|
|
|
ovs_mutex_unlock(&umap->mutex);
|
2014-02-11 13:55:35 -08:00
|
|
|
|
}
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
|
|
|
|
|
if (n_ops == REVALIDATE_MAX_BATCH) {
|
2016-01-07 16:16:25 -08:00
|
|
|
|
/* Update/delete missed flows and clean up corresponding ukeys
|
|
|
|
|
* if necessary. */
|
ofproto-dpif-upcall: Simplify revalidator_sweep__().
Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
to be removed or updated. This depends on a variety of factors such as
whether the datapath is being flushed, whether individual flows were
recently dumped, and whether those flows are valid for the current
revalidation generation.
Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.
While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.
Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-07 11:47:47 -08:00
|
|
|
|
push_ukey_ops(udpif, umap, ops, n_ops);
|
|
|
|
|
n_ops = 0;
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2014-02-11 13:55:35 -08:00
|
|
|
|
|
2014-06-05 17:28:46 +12:00
|
|
|
|
if (n_ops) {
|
2014-08-21 00:21:03 +12:00
|
|
|
|
push_ukey_ops(udpif, umap, ops, n_ops);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
}
|
2015-08-03 18:43:53 -07:00
|
|
|
|
|
|
|
|
|
ofpbuf_uninit(&odp_actions);
|
2014-06-05 17:28:46 +12:00
|
|
|
|
ovsrcu_quiesce();
|
2014-02-11 13:55:35 -08:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-02-11 13:55:36 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
revalidator_sweep(struct revalidator *revalidator)
|
|
|
|
|
{
|
|
|
|
|
revalidator_sweep__(revalidator, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
revalidator_purge(struct revalidator *revalidator)
|
|
|
|
|
{
|
|
|
|
|
revalidator_sweep__(revalidator, true);
|
|
|
|
|
}
|
2015-08-25 16:36:46 -07:00
|
|
|
|
|
|
|
|
|
/* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */
|
|
|
|
|
static void
|
|
|
|
|
dp_purge_cb(void *aux, unsigned pmd_id)
|
2016-08-31 11:06:04 -07:00
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
2015-08-25 16:36:46 -07:00
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif = aux;
|
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
udpif_pause_revalidators(udpif);
|
|
|
|
|
for (i = 0; i < N_UMAPS; i++) {
|
|
|
|
|
struct ukey_op ops[REVALIDATE_MAX_BATCH];
|
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
struct umap *umap = &udpif->ukeys[i];
|
|
|
|
|
size_t n_ops = 0;
|
|
|
|
|
|
|
|
|
|
CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
|
2016-08-31 11:06:04 -07:00
|
|
|
|
if (ukey->pmd_id == pmd_id) {
|
2015-08-25 16:36:46 -07:00
|
|
|
|
delete_op_init(udpif, &ops[n_ops++], ukey);
|
2016-08-31 11:06:04 -07:00
|
|
|
|
transition_ukey(ukey, UKEY_EVICTING);
|
|
|
|
|
|
2015-08-25 16:36:46 -07:00
|
|
|
|
if (n_ops == REVALIDATE_MAX_BATCH) {
|
|
|
|
|
push_ukey_ops(udpif, umap, ops, n_ops);
|
|
|
|
|
n_ops = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (n_ops) {
|
|
|
|
|
push_ukey_ops(udpif, umap, ops, n_ops);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ovsrcu_quiesce();
|
|
|
|
|
}
|
|
|
|
|
udpif_resume_revalidators(udpif);
|
|
|
|
|
}
|
2013-11-20 18:06:12 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
dpif-netlink: Fix issues of the offloaded flows counter.
The n_offloaded_flows counter is saved in dpif, and this is the first
one when ofproto is created. When flow operation is done by ovs-appctl
commands, such as, dpctl/add-flow, a new dpif is opened, and the
n_offloaded_flows in it can't be used. So, instead of using counter,
the number of offloaded flows is queried from each netdev, then sum
them up. To achieve this, a new API is added in netdev_flow_api to get
how many flows assigned to a netdev.
In order to get better performance, this number is calculated directly
from tc_to_ufid hmap for netdev-offload-tc, because flow dumping by tc
takes much time if there are many flows offloaded.
Fixes: af0618470507 ("dpif-netlink: Count the number of offloaded rules")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 02:47:32 +00:00
|
|
|
|
uint64_t n_offloaded_flows;
|
2013-11-20 18:06:12 -08:00
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unsigned int flow_limit;
|
2014-10-06 11:14:08 +13:00
|
|
|
|
bool ufid_enabled;
|
2013-11-20 18:06:12 -08:00
|
|
|
|
size_t i;
|
|
|
|
|
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
|
2014-12-19 09:54:38 -08:00
|
|
|
|
ufid_enabled = udpif_use_ufid(udpif);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2013-11-20 18:06:12 -08:00
|
|
|
|
ds_put_format(&ds, "%s:\n", dpif_name(udpif->dpif));
|
2018-05-25 17:02:22 -07:00
|
|
|
|
ds_put_format(&ds, " flows : (current %lu)"
|
2013-09-24 13:39:56 -07:00
|
|
|
|
" (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif),
|
|
|
|
|
udpif->avg_n_flows, udpif->max_n_flows, flow_limit);
|
dpif-netlink: Fix issues of the offloaded flows counter.
The n_offloaded_flows counter is saved in dpif, and this is the first
one when ofproto is created. When flow operation is done by ovs-appctl
commands, such as, dpctl/add-flow, a new dpif is opened, and the
n_offloaded_flows in it can't be used. So, instead of using counter,
the number of offloaded flows is queried from each netdev, then sum
them up. To achieve this, a new API is added in netdev_flow_api to get
how many flows assigned to a netdev.
In order to get better performance, this number is calculated directly
from tc_to_ufid hmap for netdev-offload-tc, because flow dumping by tc
takes much time if there are many flows offloaded.
Fixes: af0618470507 ("dpif-netlink: Count the number of offloaded rules")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 02:47:32 +00:00
|
|
|
|
if (!dpif_get_n_offloaded_flows(udpif->dpif, &n_offloaded_flows)) {
|
|
|
|
|
ds_put_format(&ds, " offloaded flows : %"PRIu64"\n",
|
|
|
|
|
n_offloaded_flows);
|
|
|
|
|
}
|
2018-05-25 17:02:22 -07:00
|
|
|
|
ds_put_format(&ds, " dump duration : %lldms\n", udpif->dump_duration);
|
|
|
|
|
ds_put_format(&ds, " ufid enabled : ");
|
2014-10-06 11:14:08 +13:00
|
|
|
|
if (ufid_enabled) {
|
|
|
|
|
ds_put_format(&ds, "true\n");
|
|
|
|
|
} else {
|
|
|
|
|
ds_put_format(&ds, "false\n");
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
ds_put_char(&ds, '\n');
|
2014-06-05 17:28:46 +12:00
|
|
|
|
|
2022-01-25 18:18:56 +08:00
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator *revalidator = &udpif->revalidators[i];
|
2014-06-05 17:28:46 +12:00
|
|
|
|
int j, elements = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2022-01-25 18:18:56 +08:00
|
|
|
|
for (j = i; j < N_UMAPS; j += udpif->n_revalidators) {
|
2014-06-05 17:28:46 +12:00
|
|
|
|
elements += cmap_count(&udpif->ukeys[j].cmap);
|
|
|
|
|
}
|
2018-05-25 17:02:22 -07:00
|
|
|
|
ds_put_format(&ds, " %u: (keys %d)\n", revalidator->id, elements);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-11-20 18:06:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unixctl_command_reply(conn, ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
/* Disable using the megaflows.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_disable_megaflows(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED,
|
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_store_relaxed(&enable_megaflows, false);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush_all_datapaths();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unixctl_command_reply(conn, "megaflows disabled");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Re-enable using megaflows.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_enable_megaflows(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED,
|
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_store_relaxed(&enable_megaflows, true);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush_all_datapaths();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unixctl_command_reply(conn, "megaflows enabled");
|
|
|
|
|
}
|
2014-02-06 09:49:19 -08:00
|
|
|
|
|
2014-10-06 11:14:08 +13:00
|
|
|
|
/* Disable skipping flow attributes during flow dump.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_disable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
2014-12-19 09:54:38 -08:00
|
|
|
|
atomic_store_relaxed(&enable_ufid, false);
|
2014-10-06 11:14:08 +13:00
|
|
|
|
unixctl_command_reply(conn, "Datapath dumping tersely using UFID disabled");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Re-enable skipping flow attributes during flow dump.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not documented
|
|
|
|
|
* in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_enable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
2014-12-19 09:54:38 -08:00
|
|
|
|
atomic_store_relaxed(&enable_ufid, true);
|
|
|
|
|
unixctl_command_reply(conn, "Datapath dumping tersely using UFID enabled "
|
|
|
|
|
"for supported datapaths");
|
2014-10-06 11:14:08 +13:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-06 09:49:19 -08:00
|
|
|
|
/* Set the flow limit.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_set_flow_limit(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
2016-12-19 14:18:25 -08:00
|
|
|
|
const char *argv[],
|
2014-02-06 09:49:19 -08:00
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
unsigned int flow_limit = atoi(argv[1]);
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
2014-08-29 10:34:53 -07:00
|
|
|
|
atomic_store_relaxed(&udpif->flow_limit, flow_limit);
|
2014-02-06 09:49:19 -08:00
|
|
|
|
}
|
|
|
|
|
ds_put_format(&ds, "set flow_limit to %u\n", flow_limit);
|
|
|
|
|
unixctl_command_reply(conn, ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
2014-06-25 14:02:45 +00:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_dump_wait(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED,
|
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
2016-03-25 14:10:22 -07:00
|
|
|
|
if (ovs_list_is_singleton(&all_udpifs)) {
|
2014-09-15 10:10:34 -07:00
|
|
|
|
struct udpif *udpif = NULL;
|
2014-06-25 14:02:45 +00:00
|
|
|
|
size_t len;
|
|
|
|
|
|
2016-03-25 14:10:22 -07:00
|
|
|
|
udpif = OBJECT_CONTAINING(ovs_list_front(&all_udpifs), udpif, list_node);
|
2014-06-25 14:02:45 +00:00
|
|
|
|
len = (udpif->n_conns + 1) * sizeof *udpif->conns;
|
|
|
|
|
udpif->conn_seq = seq_read(udpif->dump_seq);
|
|
|
|
|
udpif->conns = xrealloc(udpif->conns, len);
|
|
|
|
|
udpif->conns[udpif->n_conns++] = conn;
|
|
|
|
|
} else {
|
|
|
|
|
unixctl_command_reply_error(conn, "can't wait on multiple udpifs.");
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-11-13 10:42:47 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_purge(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
ofproto-dpif-upcall: Pause revalidators when purging.
This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.
A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.
This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:
2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
packet_type(ns=0,id=0),
eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
failed to flow_get (No such file or directory)
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""
To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.
Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:
2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
failed to flow_get (No such file or directory)
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9
To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.
As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.
Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-10-18 16:23:53 +02:00
|
|
|
|
bool wake_up = false;
|
2014-11-13 10:42:47 -08:00
|
|
|
|
int n;
|
|
|
|
|
|
ofproto-dpif-upcall: Pause revalidators when purging.
This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.
A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.
This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:
2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
packet_type(ns=0,id=0),
eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
failed to flow_get (No such file or directory)
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""
To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.
Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:
2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
failed to flow_get (No such file or directory)
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9
To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.
As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.
Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-10-18 16:23:53 +02:00
|
|
|
|
if (!latch_is_set(&udpif->pause_latch)) {
|
|
|
|
|
udpif_pause_revalidators(udpif);
|
|
|
|
|
wake_up = true;
|
|
|
|
|
}
|
2014-11-13 10:42:47 -08:00
|
|
|
|
for (n = 0; n < udpif->n_revalidators; n++) {
|
|
|
|
|
revalidator_purge(&udpif->revalidators[n]);
|
|
|
|
|
}
|
ofproto-dpif-upcall: Pause revalidators when purging.
This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.
A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.
This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:
2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
packet_type(ns=0,id=0),
eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
failed to flow_get (No such file or directory)
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""
To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.
Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:
2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
failed to flow_get (No such file or directory)
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
Failed to acquire udpif_key corresponding to unexpected flow
(No such file or directory):
ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9
To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.
As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.
Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-10-18 16:23:53 +02:00
|
|
|
|
if (wake_up) {
|
|
|
|
|
udpif_resume_revalidators(udpif);
|
|
|
|
|
}
|
2014-11-13 10:42:47 -08:00
|
|
|
|
}
|
|
|
|
|
unixctl_command_reply(conn, "");
|
|
|
|
|
}
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
|
2022-09-13 21:08:51 +02:00
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_pause(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
|
|
|
|
udpif_pause_revalidators(udpif);
|
|
|
|
|
}
|
|
|
|
|
unixctl_command_reply(conn, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_resume(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
|
|
|
|
udpif_resume_revalidators(udpif);
|
|
|
|
|
}
|
|
|
|
|
unixctl_command_reply(conn, "");
|
|
|
|
|
}
|
|
|
|
|
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_ofproto_detrace(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
const char *key_s = argv[1];
|
2024-12-05 15:50:32 +01:00
|
|
|
|
const char *pmd_str = NULL;
|
|
|
|
|
unsigned int pmd_id;
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
ovs_u128 ufid;
|
|
|
|
|
|
|
|
|
|
if (odp_ufid_from_string(key_s, &ufid) <= 0) {
|
|
|
|
|
unixctl_command_reply_error(conn, "failed to parse ufid");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (argc == 3) {
|
2024-12-05 15:50:32 +01:00
|
|
|
|
pmd_str = argv[2];
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
if (!ovs_scan(pmd_str, "pmd=%d", &pmd_id)) {
|
|
|
|
|
unixctl_command_reply_error(conn,
|
|
|
|
|
"Invalid pmd argument format. "
|
|
|
|
|
"Expecting 'pmd=PMD-ID'");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
2024-12-05 15:50:32 +01:00
|
|
|
|
if (!pmd_str) {
|
|
|
|
|
const char *type = dpif_normalize_type(dpif_type(udpif->dpif));
|
|
|
|
|
|
|
|
|
|
pmd_id = !strcmp(type, "system") ? PMD_ID_NULL : NON_PMD_CORE_ID;
|
|
|
|
|
}
|
|
|
|
|
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
struct udpif_key *ukey = ukey_lookup(udpif, &ufid, pmd_id);
|
|
|
|
|
if (!ukey) {
|
2024-12-05 15:50:32 +01:00
|
|
|
|
ds_put_format(&ds, "UFID was not found for %s\n",
|
|
|
|
|
dpif_name(udpif->dpif));
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
|
|
|
|
/* It only makes sense to format rules for ukeys that are (still)
|
|
|
|
|
* in use. */
|
|
|
|
|
if ((ukey->state == UKEY_VISIBLE || ukey->state == UKEY_OPERATIONAL)
|
|
|
|
|
&& ukey->xcache) {
|
|
|
|
|
xlate_xcache_format(&ds, ukey->xcache);
|
2024-12-05 15:50:32 +01:00
|
|
|
|
} else {
|
|
|
|
|
ds_put_format(&ds, "Cache was not found for %s\n",
|
|
|
|
|
dpif_name(udpif->dpif));
|
ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.
The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.
$ ovs-appctl dpctl/dump-flows -m
flow-dump from pmd on cpu core: 7
ufid:7460db8f..., recirc_id(0), ....
b. dump related OpenFlow rules and groups:
$ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
cookie=0x0, table=1 priority=200,actions=group:1
group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
cookie=0x0, table=2 actions=output:1
The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL. That should be fine as all
other ukeys should not be relevant for the use case presented above.
This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-07-12 15:47:55 +02:00
|
|
|
|
}
|
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
|
|
|
|
}
|
|
|
|
|
unixctl_command_reply(conn, ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-13 21:08:51 +02:00
|
|
|
|
|
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
2018-10-18 21:43:14 +05:30
|
|
|
|
/* Flows are sorted in the following order:
|
|
|
|
|
* netdev, flow state (offloaded/kernel path), flow_pps_rate.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
flow_compare_rebalance(const void *elem1, const void *elem2)
|
|
|
|
|
{
|
|
|
|
|
const struct udpif_key *f1 = *(struct udpif_key **)elem1;
|
|
|
|
|
const struct udpif_key *f2 = *(struct udpif_key **)elem2;
|
|
|
|
|
int64_t diff;
|
|
|
|
|
|
|
|
|
|
if (f1->in_netdev < f2->in_netdev) {
|
|
|
|
|
return -1;
|
|
|
|
|
} else if (f1->in_netdev > f2->in_netdev) {
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (f1->offloaded != f2->offloaded) {
|
|
|
|
|
return f2->offloaded - f1->offloaded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
diff = (f1->offloaded == true) ?
|
|
|
|
|
f1->flow_pps_rate - f2->flow_pps_rate :
|
|
|
|
|
f2->flow_pps_rate - f1->flow_pps_rate;
|
|
|
|
|
|
|
|
|
|
return (diff < 0) ? -1 : 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Insert flows from pending array during rebalancing */
|
|
|
|
|
static int
|
|
|
|
|
rebalance_insert_pending(struct udpif *udpif, struct udpif_key **pending_flows,
|
|
|
|
|
int pending_count, int insert_count,
|
|
|
|
|
uint64_t rate_threshold)
|
|
|
|
|
{
|
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < pending_count; i++) {
|
|
|
|
|
struct udpif_key *flow = pending_flows[i];
|
|
|
|
|
int err;
|
|
|
|
|
|
|
|
|
|
/* Stop offloading pending flows if the insert count is
|
|
|
|
|
* reached and the flow rate is less than the threshold
|
|
|
|
|
*/
|
|
|
|
|
if (count >= insert_count && flow->flow_pps_rate < rate_threshold) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Offload the flow to netdev */
|
|
|
|
|
err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_ALWAYS);
|
|
|
|
|
|
|
|
|
|
if (err == ENOSPC) {
|
|
|
|
|
/* Stop if we are out of resources */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (err) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Offload succeeded; delete it from the kernel datapath */
|
|
|
|
|
udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
|
|
|
|
|
|
|
|
|
|
/* Change the state of the flow, adjust dpif counters */
|
|
|
|
|
flow->offloaded = true;
|
|
|
|
|
|
|
|
|
|
udpif_set_ukey_backlog_packets(flow);
|
|
|
|
|
count++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remove flows from offloaded array during rebalancing */
|
|
|
|
|
static void
|
|
|
|
|
rebalance_remove_offloaded(struct udpif *udpif,
|
|
|
|
|
struct udpif_key **offloaded_flows,
|
|
|
|
|
int offload_count)
|
|
|
|
|
{
|
|
|
|
|
for (int i = 0; i < offload_count; i++) {
|
|
|
|
|
struct udpif_key *flow = offloaded_flows[i];
|
|
|
|
|
int err;
|
|
|
|
|
|
|
|
|
|
/* Install the flow into kernel path first */
|
|
|
|
|
err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_NEVER);
|
|
|
|
|
if (err) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Success; now remove offloaded flow from netdev */
|
|
|
|
|
err = udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_ALWAYS);
|
|
|
|
|
if (err) {
|
|
|
|
|
udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
udpif_set_ukey_backlog_packets(flow);
|
|
|
|
|
flow->offloaded = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Rebalance offloaded flows on a netdev that's in OOR state.
|
|
|
|
|
*
|
|
|
|
|
* The rebalancing is done in two phases. In the first phase, we check if
|
|
|
|
|
* the pending flows can be offloaded (if some resources became available
|
|
|
|
|
* in the meantime) by trying to offload each pending flow. If all pending
|
|
|
|
|
* flows get successfully offloaded, the OOR state is cleared on the netdev
|
|
|
|
|
* and there's nothing to rebalance.
|
|
|
|
|
*
|
|
|
|
|
* If some of the pending flows could not be offloaded, i.e, we still see
|
|
|
|
|
* the OOR error, then we move to the second phase of rebalancing. In this
|
|
|
|
|
* phase, the rebalancer compares pps-rate of an offloaded flow with the
|
|
|
|
|
* least pps-rate with that of a pending flow with the highest pps-rate from
|
|
|
|
|
* their respective sorted arrays. If pps-rate of the offloaded flow is less
|
|
|
|
|
* than the pps-rate of the pending flow, then it deletes the offloaded flow
|
|
|
|
|
* from the HW/netdev and adds it to kernel datapath and then offloads pending
|
|
|
|
|
* to HW/netdev. This process is repeated for every pair of offloaded and
|
|
|
|
|
* pending flows in the ordered list. The process stops when we encounter an
|
|
|
|
|
* offloaded flow that has a higher pps-rate than the corresponding pending
|
|
|
|
|
* flow. The entire rebalancing process is repeated in the next iteration.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
rebalance_device(struct udpif *udpif, struct udpif_key **offloaded_flows,
|
|
|
|
|
int offload_count, struct udpif_key **pending_flows,
|
|
|
|
|
int pending_count)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/* Phase 1 */
|
|
|
|
|
int num_inserted = rebalance_insert_pending(udpif, pending_flows,
|
|
|
|
|
pending_count, pending_count,
|
|
|
|
|
0);
|
|
|
|
|
if (num_inserted) {
|
|
|
|
|
VLOG_DBG("Offload rebalance: Phase1: inserted %d pending flows",
|
|
|
|
|
num_inserted);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Adjust pending array */
|
|
|
|
|
pending_flows = &pending_flows[num_inserted];
|
|
|
|
|
pending_count -= num_inserted;
|
|
|
|
|
|
|
|
|
|
if (!pending_count) {
|
|
|
|
|
/*
|
|
|
|
|
* Successfully offloaded all pending flows. The device
|
|
|
|
|
* is no longer in OOR state; done rebalancing this device.
|
|
|
|
|
*/
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Phase 2; determine how many offloaded flows to churn.
|
|
|
|
|
*/
|
|
|
|
|
#define OFFL_REBAL_MAX_CHURN 1024
|
|
|
|
|
int churn_count = 0;
|
|
|
|
|
while (churn_count < OFFL_REBAL_MAX_CHURN && churn_count < offload_count
|
|
|
|
|
&& churn_count < pending_count) {
|
|
|
|
|
if (pending_flows[churn_count]->flow_pps_rate <=
|
|
|
|
|
offloaded_flows[churn_count]->flow_pps_rate)
|
|
|
|
|
break;
|
|
|
|
|
churn_count++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (churn_count) {
|
|
|
|
|
VLOG_DBG("Offload rebalance: Phase2: removing %d offloaded flows",
|
|
|
|
|
churn_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Bail early if nothing to churn */
|
|
|
|
|
if (!churn_count) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remove offloaded flows */
|
|
|
|
|
rebalance_remove_offloaded(udpif, offloaded_flows, churn_count);
|
|
|
|
|
|
|
|
|
|
/* Adjust offloaded array */
|
|
|
|
|
offloaded_flows = &offloaded_flows[churn_count];
|
|
|
|
|
offload_count -= churn_count;
|
|
|
|
|
|
|
|
|
|
/* Replace offloaded flows with pending flows */
|
|
|
|
|
num_inserted = rebalance_insert_pending(udpif, pending_flows,
|
|
|
|
|
pending_count, churn_count,
|
|
|
|
|
offload_count ?
|
|
|
|
|
offloaded_flows[0]->flow_pps_rate :
|
|
|
|
|
0);
|
|
|
|
|
if (num_inserted) {
|
|
|
|
|
VLOG_DBG("Offload rebalance: Phase2: inserted %d pending flows",
|
|
|
|
|
num_inserted);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct udpif_key **
|
|
|
|
|
udpif_add_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
|
|
|
|
|
size_t *alloc_flow_count, struct udpif_key *ukey)
|
|
|
|
|
{
|
|
|
|
|
if (*total_flow_count >= *alloc_flow_count) {
|
|
|
|
|
sort_flows = x2nrealloc(sort_flows, alloc_flow_count, sizeof ukey);
|
|
|
|
|
}
|
|
|
|
|
sort_flows[(*total_flow_count)++] = ukey;
|
|
|
|
|
return sort_flows;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Build sort_flows[] initially with flows that
|
|
|
|
|
* reference an 'OOR' netdev as their input port.
|
|
|
|
|
*/
|
|
|
|
|
static struct udpif_key **
|
|
|
|
|
udpif_build_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
|
|
|
|
|
size_t *alloc_flow_count, struct udpif_key *ukey,
|
|
|
|
|
int *oor_netdev_count)
|
|
|
|
|
{
|
|
|
|
|
struct netdev *netdev;
|
|
|
|
|
int count;
|
|
|
|
|
|
|
|
|
|
/* Input netdev must be available for the flow */
|
|
|
|
|
netdev = ukey->in_netdev;
|
|
|
|
|
if (!netdev) {
|
|
|
|
|
return sort_flows;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Is the in-netdev for this flow in OOR state ? */
|
|
|
|
|
if (!netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR)) {
|
|
|
|
|
ukey_netdev_unref(ukey);
|
|
|
|
|
return sort_flows;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Add the flow to sort_flows[] */
|
|
|
|
|
sort_flows = udpif_add_oor_flows(sort_flows, total_flow_count,
|
|
|
|
|
alloc_flow_count, ukey);
|
|
|
|
|
if (ukey->offloaded) {
|
|
|
|
|
count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
|
|
|
|
|
ovs_assert(count >= 0);
|
|
|
|
|
if (count++ == 0) {
|
|
|
|
|
(*oor_netdev_count)++;
|
|
|
|
|
}
|
|
|
|
|
netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, count);
|
|
|
|
|
} else {
|
|
|
|
|
count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
|
|
|
|
|
ovs_assert(count >= 0);
|
|
|
|
|
netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, ++count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sort_flows;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Rebalance offloaded flows on HW netdevs that are in OOR state.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
udpif_flow_rebalance(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
struct udpif_key **sort_flows = NULL;
|
|
|
|
|
size_t alloc_flow_count = 0;
|
|
|
|
|
size_t total_flow_count = 0;
|
|
|
|
|
int oor_netdev_count = 0;
|
|
|
|
|
int offload_index = 0;
|
|
|
|
|
int pending_index;
|
|
|
|
|
|
|
|
|
|
/* Collect flows (offloaded and pending) that reference OOR netdevs */
|
|
|
|
|
for (size_t i = 0; i < N_UMAPS; i++) {
|
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
struct umap *umap = &udpif->ukeys[i];
|
|
|
|
|
|
|
|
|
|
CMAP_FOR_EACH (ukey, cmap_node, &umap->cmap) {
|
|
|
|
|
ukey_to_flow_netdev(udpif, ukey);
|
|
|
|
|
sort_flows = udpif_build_oor_flows(sort_flows, &total_flow_count,
|
|
|
|
|
&alloc_flow_count, ukey,
|
|
|
|
|
&oor_netdev_count);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sort flows by OOR netdevs, state (offloaded/pending) and pps-rate */
|
|
|
|
|
qsort(sort_flows, total_flow_count, sizeof(struct udpif_key *),
|
|
|
|
|
flow_compare_rebalance);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We now have flows referencing OOR netdevs, that are sorted. We also
|
|
|
|
|
* have a count of offloaded and pending flows on each of the netdevs
|
|
|
|
|
* that are in OOR state. Now rebalance each oor-netdev.
|
|
|
|
|
*/
|
|
|
|
|
while (oor_netdev_count) {
|
|
|
|
|
struct netdev *netdev;
|
|
|
|
|
int offload_count;
|
|
|
|
|
int pending_count;
|
|
|
|
|
bool oor;
|
|
|
|
|
|
|
|
|
|
netdev = sort_flows[offload_index]->in_netdev;
|
|
|
|
|
ovs_assert(netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR) == true);
|
|
|
|
|
VLOG_DBG("Offload rebalance: netdev: %s is OOR", netdev->name);
|
|
|
|
|
|
|
|
|
|
offload_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
|
|
|
|
|
pending_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
|
|
|
|
|
pending_index = offload_index + offload_count;
|
|
|
|
|
|
|
|
|
|
oor = rebalance_device(udpif,
|
|
|
|
|
&sort_flows[offload_index], offload_count,
|
|
|
|
|
&sort_flows[pending_index], pending_count);
|
|
|
|
|
netdev_set_hw_info(netdev, HW_INFO_TYPE_OOR, oor);
|
|
|
|
|
|
|
|
|
|
offload_index = pending_index + pending_count;
|
|
|
|
|
netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, 0);
|
|
|
|
|
netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, 0);
|
|
|
|
|
oor_netdev_count--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < total_flow_count; i++) {
|
|
|
|
|
struct udpif_key *ukey = sort_flows[i];
|
|
|
|
|
ukey_netdev_unref(ukey);
|
|
|
|
|
}
|
|
|
|
|
free(sort_flows);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
enum dpif_offload_type offload_type)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_op *opsp;
|
|
|
|
|
struct ukey_op uop;
|
|
|
|
|
|
|
|
|
|
opsp = &uop.dop;
|
|
|
|
|
put_op_init(&uop, ukey, DPIF_FP_CREATE);
|
|
|
|
|
dpif_operate(udpif->dpif, &opsp, 1, offload_type);
|
|
|
|
|
|
|
|
|
|
return opsp->error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
enum dpif_offload_type offload_type)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_op *opsp;
|
|
|
|
|
struct ukey_op uop;
|
|
|
|
|
|
|
|
|
|
opsp = &uop.dop;
|
|
|
|
|
delete_op_init(udpif, &uop, ukey);
|
|
|
|
|
dpif_operate(udpif->dpif, &opsp, 1, offload_type);
|
|
|
|
|
|
|
|
|
|
return opsp->error;
|
|
|
|
|
}
|