2014-01-13 15:33:27 -08:00
|
|
|
|
/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
|
2013-06-25 14:45:43 -07:00
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License. */
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
#include "ofproto-dpif-upcall.h"
|
|
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
#include <inttypes.h>
|
|
|
|
|
|
2013-10-22 16:16:31 -07:00
|
|
|
|
#include "connmgr.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "coverage.h"
|
|
|
|
|
#include "dpif.h"
|
2013-11-20 18:06:12 -08:00
|
|
|
|
#include "dynamic-string.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "fail-open.h"
|
2013-09-12 17:42:23 -07:00
|
|
|
|
#include "guarded-list.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "latch.h"
|
|
|
|
|
#include "list.h"
|
|
|
|
|
#include "netlink.h"
|
|
|
|
|
#include "ofpbuf.h"
|
2013-09-24 15:04:04 -07:00
|
|
|
|
#include "ofproto-dpif-ipfix.h"
|
|
|
|
|
#include "ofproto-dpif-sflow.h"
|
2013-09-24 13:39:56 -07:00
|
|
|
|
#include "ofproto-dpif-xlate.h"
|
2014-03-18 16:34:28 -07:00
|
|
|
|
#include "ovs-rcu.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "packets.h"
|
|
|
|
|
#include "poll-loop.h"
|
2013-11-20 18:06:12 -08:00
|
|
|
|
#include "seq.h"
|
|
|
|
|
#include "unixctl.h"
|
2013-06-25 14:45:43 -07:00
|
|
|
|
#include "vlog.h"
|
|
|
|
|
|
|
|
|
|
#define MAX_QUEUE_LENGTH 512
|
2013-09-24 13:39:56 -07:00
|
|
|
|
#define FLOW_MISS_MAX_BATCH 50
|
|
|
|
|
#define REVALIDATE_MAX_BATCH 50
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
|
|
|
|
VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
|
|
|
|
|
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
COVERAGE_DEFINE(upcall_duplicate_flow);
|
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
/* A thread that reads upcalls from dpif, forwards each upcall's packet,
|
|
|
|
|
* and possibly sets up a kernel flow as a cache. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct handler {
|
|
|
|
|
struct udpif *udpif; /* Parent udpif. */
|
|
|
|
|
pthread_t thread; /* Thread ID. */
|
2014-02-26 23:03:24 -08:00
|
|
|
|
uint32_t handler_id; /* Handler id. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
};
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* A thread that processes datapath flows, updates OpenFlow statistics, and
|
|
|
|
|
* updates or removes them if necessary. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator {
|
|
|
|
|
struct udpif *udpif; /* Parent udpif. */
|
|
|
|
|
pthread_t thread; /* Thread ID. */
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
unsigned int id; /* ovsthread_id_self(). */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct hmap *ukeys; /* Points into udpif->ukeys for this
|
|
|
|
|
revalidator. Used for GC phase. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
};
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
/* An upcall handler for ofproto_dpif.
|
|
|
|
|
*
|
2014-02-26 23:03:24 -08:00
|
|
|
|
* udpif keeps records of two kind of logically separate units:
|
|
|
|
|
*
|
|
|
|
|
* upcall handling
|
|
|
|
|
* ---------------
|
|
|
|
|
*
|
|
|
|
|
* - An array of 'struct handler's for upcall handling and flow
|
|
|
|
|
* installation.
|
2013-09-24 13:39:56 -07:00
|
|
|
|
*
|
2014-02-26 23:03:24 -08:00
|
|
|
|
* flow revalidation
|
|
|
|
|
* -----------------
|
|
|
|
|
*
|
2014-04-10 07:14:08 +00:00
|
|
|
|
* - Revalidation threads which read the datapath flow table and maintains
|
|
|
|
|
* them.
|
|
|
|
|
*/
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct udpif {
|
2013-11-20 18:06:12 -08:00
|
|
|
|
struct list list_node; /* In all_udpifs list. */
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct dpif *dpif; /* Datapath handle. */
|
|
|
|
|
struct dpif_backer *backer; /* Opaque dpif_backer pointer. */
|
|
|
|
|
|
|
|
|
|
uint32_t secret; /* Random seed for upcall hash. */
|
|
|
|
|
|
2013-09-24 15:04:04 -07:00
|
|
|
|
struct handler *handlers; /* Upcall handlers. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
size_t n_handlers;
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator *revalidators; /* Flow revalidators. */
|
|
|
|
|
size_t n_revalidators;
|
|
|
|
|
|
|
|
|
|
struct latch exit_latch; /* Tells child threads to exit. */
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Revalidation. */
|
|
|
|
|
struct seq *reval_seq; /* Incremented to force revalidation. */
|
|
|
|
|
bool need_revalidate; /* As indicated by 'reval_seq'. */
|
|
|
|
|
bool reval_exit; /* Set by leader on 'exit_latch. */
|
|
|
|
|
pthread_barrier_t reval_barrier; /* Barrier used by revalidators. */
|
|
|
|
|
struct dpif_flow_dump dump; /* DPIF flow dump state. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
long long int dump_duration; /* Duration of the last flow dump. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct seq *dump_seq; /* Increments each dump iteration. */
|
|
|
|
|
|
|
|
|
|
/* There are 'n_revalidators' ukey hmaps. Each revalidator retains a
|
|
|
|
|
* reference to one of these for garbage collection.
|
|
|
|
|
*
|
|
|
|
|
* During the flow dump phase, revalidators insert into these with a random
|
|
|
|
|
* distribution. During the garbage collection phase, each revalidator
|
|
|
|
|
* takes care of garbage collecting one of these hmaps. */
|
|
|
|
|
struct {
|
|
|
|
|
struct ovs_mutex mutex; /* Guards the following. */
|
|
|
|
|
struct hmap hmap OVS_GUARDED; /* Datapath flow keys. */
|
|
|
|
|
} *ukeys;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Datapath flow statistics. */
|
|
|
|
|
unsigned int max_n_flows;
|
|
|
|
|
unsigned int avg_n_flows;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Following fields are accessed and modified by different threads. */
|
|
|
|
|
atomic_uint flow_limit; /* Datapath flow hard limit. */
|
2014-01-22 06:50:49 +00:00
|
|
|
|
|
|
|
|
|
/* n_flows_mutex prevents multiple threads updating these concurrently. */
|
2014-05-14 16:19:34 +09:00
|
|
|
|
atomic_ulong n_flows; /* Number of flows in the datapath. */
|
2014-01-22 06:50:49 +00:00
|
|
|
|
atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */
|
|
|
|
|
struct ovs_mutex n_flows_mutex;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
};
|
|
|
|
|
|
2013-09-24 15:04:04 -07:00
|
|
|
|
enum upcall_type {
|
|
|
|
|
BAD_UPCALL, /* Some kind of bug somewhere. */
|
|
|
|
|
MISS_UPCALL, /* A flow miss. */
|
|
|
|
|
SFLOW_UPCALL, /* sFlow sample. */
|
|
|
|
|
FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */
|
|
|
|
|
IPFIX_UPCALL /* Per-bridge sampling. */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct upcall {
|
|
|
|
|
struct flow_miss *flow_miss; /* This upcall's flow_miss. */
|
|
|
|
|
|
|
|
|
|
/* Raw upcall plus data for keeping track of the memory backing it. */
|
|
|
|
|
struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */
|
|
|
|
|
struct ofpbuf upcall_buf; /* Owns some data in 'dpif_upcall'. */
|
|
|
|
|
uint64_t upcall_stub[512 / 8]; /* Buffer to reduce need for malloc(). */
|
|
|
|
|
};
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* 'udpif_key's are responsible for tracking the little bit of state udpif
|
|
|
|
|
* needs to do flow expiration which can't be pulled directly from the
|
2014-04-10 07:14:08 +00:00
|
|
|
|
* datapath. They may be created or maintained by any revalidator during
|
|
|
|
|
* the dump phase, but are owned by a single revalidator, and are destroyed
|
|
|
|
|
* by that revalidator during the garbage-collection phase.
|
|
|
|
|
*
|
|
|
|
|
* While some elements of a udpif_key are protected by a mutex, the ukey itself
|
|
|
|
|
* is not. Therefore it is not safe to destroy a udpif_key except when all
|
|
|
|
|
* revalidators are in garbage collection phase, or they aren't running. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct udpif_key {
|
|
|
|
|
struct hmap_node hmap_node; /* In parent revalidator 'ukeys' map. */
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* These elements are read only once created, and therefore aren't
|
|
|
|
|
* protected by a mutex. */
|
|
|
|
|
const struct nlattr *key; /* Datapath flow key. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
size_t key_len; /* Length of 'key'. */
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct ovs_mutex mutex; /* Guards the following. */
|
|
|
|
|
struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
|
|
|
|
|
long long int created OVS_GUARDED; /* Estimate of creation time. */
|
|
|
|
|
bool mark OVS_GUARDED; /* For mark and sweep garbage
|
|
|
|
|
collection. */
|
|
|
|
|
bool flow_exists OVS_GUARDED; /* Ensures flows are only deleted
|
|
|
|
|
once. */
|
|
|
|
|
|
|
|
|
|
struct xlate_cache *xcache OVS_GUARDED; /* Cache for xlate entries that
|
|
|
|
|
* are affected by this ukey.
|
|
|
|
|
* Used for stats and learning.*/
|
|
|
|
|
struct odputil_keybuf key_buf; /* Memory for 'key'. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Flow miss batching.
|
|
|
|
|
*
|
|
|
|
|
* Some dpifs implement operations faster when you hand them off in a batch.
|
|
|
|
|
* To allow batching, "struct flow_miss" queues the dpif-related work needed
|
|
|
|
|
* for a given flow. Each "struct flow_miss" corresponds to sending one or
|
|
|
|
|
* more packets, plus possibly installing the flow in the dpif. */
|
|
|
|
|
struct flow_miss {
|
|
|
|
|
struct hmap_node hmap_node;
|
|
|
|
|
struct ofproto_dpif *ofproto;
|
|
|
|
|
|
|
|
|
|
struct flow flow;
|
|
|
|
|
const struct nlattr *key;
|
|
|
|
|
size_t key_len;
|
|
|
|
|
enum dpif_upcall_type upcall_type;
|
|
|
|
|
struct dpif_flow_stats stats;
|
|
|
|
|
odp_port_t odp_in_port;
|
|
|
|
|
|
|
|
|
|
uint64_t slow_path_buf[128 / 8];
|
|
|
|
|
struct odputil_keybuf mask_buf;
|
|
|
|
|
|
|
|
|
|
struct xlate_out xout;
|
2014-01-13 15:33:27 -08:00
|
|
|
|
|
|
|
|
|
bool put;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
};
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
static struct list all_udpifs = LIST_INITIALIZER(&all_udpifs);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
static size_t read_upcalls(struct handler *,
|
|
|
|
|
struct upcall upcalls[FLOW_MISS_MAX_BATCH],
|
|
|
|
|
struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH],
|
|
|
|
|
struct hmap *);
|
|
|
|
|
static void handle_upcalls(struct handler *, struct hmap *, struct upcall *,
|
|
|
|
|
size_t n_upcalls);
|
2014-04-21 17:31:11 -07:00
|
|
|
|
static void udpif_stop_threads(struct udpif *);
|
|
|
|
|
static void udpif_start_threads(struct udpif *, size_t n_handlers,
|
|
|
|
|
size_t n_revalidators);
|
2013-09-24 15:04:04 -07:00
|
|
|
|
static void *udpif_upcall_handler(void *);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void *udpif_revalidator(void *);
|
2014-05-14 16:19:34 +09:00
|
|
|
|
static unsigned long udpif_get_n_flows(struct udpif *);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
static void revalidate(struct revalidator *);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void revalidator_sweep(struct revalidator *);
|
2014-02-11 13:55:36 -08:00
|
|
|
|
static void revalidator_purge(struct revalidator *);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
|
|
|
|
static void upcall_unixctl_enable_megaflows(struct unixctl_conn *, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2014-02-06 09:49:19 -08:00
|
|
|
|
static void upcall_unixctl_set_flow_limit(struct unixctl_conn *conn, int argc,
|
|
|
|
|
const char *argv[], void *aux);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
static struct udpif_key *ukey_create(const struct nlattr *key, size_t key_len,
|
|
|
|
|
long long int used);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void ukey_delete(struct revalidator *, struct udpif_key *);
|
|
|
|
|
|
|
|
|
|
static atomic_bool enable_megaflows = ATOMIC_VAR_INIT(true);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
|
|
|
|
struct udpif *
|
|
|
|
|
udpif_create(struct dpif_backer *backer, struct dpif *dpif)
|
|
|
|
|
{
|
2013-11-20 18:06:12 -08:00
|
|
|
|
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct udpif *udpif = xzalloc(sizeof *udpif);
|
|
|
|
|
|
2013-11-20 18:06:12 -08:00
|
|
|
|
if (ovsthread_once_start(&once)) {
|
|
|
|
|
unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show,
|
|
|
|
|
NULL);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unixctl_command_register("upcall/disable-megaflows", "", 0, 0,
|
|
|
|
|
upcall_unixctl_disable_megaflows, NULL);
|
|
|
|
|
unixctl_command_register("upcall/enable-megaflows", "", 0, 0,
|
|
|
|
|
upcall_unixctl_enable_megaflows, NULL);
|
2014-02-06 09:49:19 -08:00
|
|
|
|
unixctl_command_register("upcall/set-flow-limit", "", 1, 1,
|
|
|
|
|
upcall_unixctl_set_flow_limit, NULL);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
ovsthread_once_done(&once);
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
udpif->dpif = dpif;
|
|
|
|
|
udpif->backer = backer;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000));
|
2013-06-25 14:45:43 -07:00
|
|
|
|
udpif->secret = random_uint32();
|
2013-09-17 14:35:53 -07:00
|
|
|
|
udpif->reval_seq = seq_create();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
udpif->dump_seq = seq_create();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_init(&udpif->exit_latch);
|
2013-11-20 18:06:12 -08:00
|
|
|
|
list_push_back(&all_udpifs, &udpif->list_node);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
atomic_init(&udpif->n_flows, 0);
|
|
|
|
|
atomic_init(&udpif->n_flows_timestamp, LLONG_MIN);
|
|
|
|
|
ovs_mutex_init(&udpif->n_flows_mutex);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
|
|
|
|
return udpif;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
udpif_destroy(struct udpif *udpif)
|
|
|
|
|
{
|
2014-04-21 17:31:11 -07:00
|
|
|
|
udpif_stop_threads(udpif);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-11-20 18:06:12 -08:00
|
|
|
|
list_remove(&udpif->list_node);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_destroy(&udpif->exit_latch);
|
2013-09-17 14:35:53 -07:00
|
|
|
|
seq_destroy(udpif->reval_seq);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
seq_destroy(udpif->dump_seq);
|
2014-01-22 06:50:49 +00:00
|
|
|
|
ovs_mutex_destroy(&udpif->n_flows_mutex);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
free(udpif);
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-21 17:31:11 -07:00
|
|
|
|
/* Stops the handler and revalidator threads, must be enclosed in
|
|
|
|
|
* ovsrcu quiescent state unless when destroying udpif. */
|
|
|
|
|
static void
|
|
|
|
|
udpif_stop_threads(struct udpif *udpif)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-04-21 20:05:08 -07:00
|
|
|
|
if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
latch_set(&udpif->exit_latch);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < udpif->n_handlers; i++) {
|
|
|
|
|
struct handler *handler = &udpif->handlers[i];
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xpthread_join(handler->thread, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
xpthread_join(udpif->revalidators[i].thread, NULL);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
|
|
|
|
struct revalidator *revalidator = &udpif->revalidators[i];
|
|
|
|
|
|
2014-02-11 13:55:36 -08:00
|
|
|
|
/* Delete ukeys, and delete all flows from the datapath to prevent
|
|
|
|
|
* double-counting stats. */
|
|
|
|
|
revalidator_purge(revalidator);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
hmap_destroy(&udpif->ukeys[i].hmap);
|
|
|
|
|
ovs_mutex_destroy(&udpif->ukeys[i].mutex);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
latch_poll(&udpif->exit_latch);
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
xpthread_barrier_destroy(&udpif->reval_barrier);
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
free(udpif->revalidators);
|
|
|
|
|
udpif->revalidators = NULL;
|
|
|
|
|
udpif->n_revalidators = 0;
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
free(udpif->handlers);
|
|
|
|
|
udpif->handlers = NULL;
|
|
|
|
|
udpif->n_handlers = 0;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
free(udpif->ukeys);
|
|
|
|
|
udpif->ukeys = NULL;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-04-21 17:31:11 -07:00
|
|
|
|
/* Starts the handler and revalidator threads, must be enclosed in
|
|
|
|
|
* ovsrcu quiescent state. */
|
|
|
|
|
static void
|
|
|
|
|
udpif_start_threads(struct udpif *udpif, size_t n_handlers,
|
|
|
|
|
size_t n_revalidators)
|
|
|
|
|
{
|
2014-04-25 10:39:53 -07:00
|
|
|
|
if (udpif && n_handlers && n_revalidators) {
|
2013-06-25 14:45:43 -07:00
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
udpif->n_handlers = n_handlers;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
udpif->n_revalidators = n_revalidators;
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers);
|
|
|
|
|
for (i = 0; i < udpif->n_handlers; i++) {
|
|
|
|
|
struct handler *handler = &udpif->handlers[i];
|
|
|
|
|
|
|
|
|
|
handler->udpif = udpif;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
handler->handler_id = i;
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
handler->thread = ovs_thread_create(
|
|
|
|
|
"handler", udpif_upcall_handler, handler);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
xpthread_barrier_init(&udpif->reval_barrier, NULL,
|
|
|
|
|
udpif->n_revalidators);
|
|
|
|
|
udpif->reval_exit = false;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
udpif->revalidators = xzalloc(udpif->n_revalidators
|
|
|
|
|
* sizeof *udpif->revalidators);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
udpif->ukeys = xmalloc(sizeof *udpif->ukeys * n_revalidators);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
|
|
|
|
struct revalidator *revalidator = &udpif->revalidators[i];
|
|
|
|
|
|
|
|
|
|
revalidator->udpif = udpif;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
hmap_init(&udpif->ukeys[i].hmap);
|
|
|
|
|
ovs_mutex_init(&udpif->ukeys[i].mutex);
|
|
|
|
|
revalidator->ukeys = &udpif->ukeys[i].hmap;
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
revalidator->thread = ovs_thread_create(
|
|
|
|
|
"revalidator", udpif_revalidator, revalidator);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
}
|
2014-03-18 16:34:28 -07:00
|
|
|
|
|
2014-04-21 17:31:11 -07:00
|
|
|
|
/* Tells 'udpif' how many threads it should use to handle upcalls.
|
|
|
|
|
* 'n_handlers' and 'n_revalidators' can never be zero. 'udpif''s
|
|
|
|
|
* datapath handle must have packet reception enabled before starting
|
|
|
|
|
* threads. */
|
|
|
|
|
void
|
|
|
|
|
udpif_set_threads(struct udpif *udpif, size_t n_handlers,
|
|
|
|
|
size_t n_revalidators)
|
|
|
|
|
{
|
2014-04-21 20:05:08 -07:00
|
|
|
|
ovs_assert(udpif);
|
2014-04-21 17:31:11 -07:00
|
|
|
|
ovs_assert(n_handlers && n_revalidators);
|
|
|
|
|
|
|
|
|
|
ovsrcu_quiesce_start();
|
2014-04-21 20:05:08 -07:00
|
|
|
|
if (udpif->n_handlers != n_handlers
|
|
|
|
|
|| udpif->n_revalidators != n_revalidators) {
|
|
|
|
|
udpif_stop_threads(udpif);
|
|
|
|
|
}
|
2014-04-21 17:31:11 -07:00
|
|
|
|
|
2014-04-21 20:05:08 -07:00
|
|
|
|
if (!udpif->handlers && !udpif->revalidators) {
|
2014-05-09 14:42:30 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
error = dpif_handlers_set(udpif->dpif, n_handlers);
|
|
|
|
|
if (error) {
|
|
|
|
|
VLOG_ERR("failed to configure handlers in dpif %s: %s",
|
|
|
|
|
dpif_name(udpif->dpif), ovs_strerror(error));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-21 20:05:08 -07:00
|
|
|
|
udpif_start_threads(udpif, n_handlers, n_revalidators);
|
|
|
|
|
}
|
2014-03-18 16:34:28 -07:00
|
|
|
|
ovsrcu_quiesce_end();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-25 08:01:01 -08:00
|
|
|
|
/* Waits for all ongoing upcall translations to complete. This ensures that
|
|
|
|
|
* there are no transient references to any removed ofprotos (or other
|
|
|
|
|
* objects). In particular, this should be called after an ofproto is removed
|
|
|
|
|
* (e.g. via xlate_remove_ofproto()) but before it is destroyed. */
|
|
|
|
|
void
|
|
|
|
|
udpif_synchronize(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
/* This is stronger than necessary. It would be sufficient to ensure
|
|
|
|
|
* (somehow) that each handler and revalidator thread had passed through
|
|
|
|
|
* its main loop once. */
|
|
|
|
|
size_t n_handlers = udpif->n_handlers;
|
|
|
|
|
size_t n_revalidators = udpif->n_revalidators;
|
2014-04-21 17:31:11 -07:00
|
|
|
|
|
|
|
|
|
ovsrcu_quiesce_start();
|
|
|
|
|
udpif_stop_threads(udpif);
|
|
|
|
|
udpif_start_threads(udpif, n_handlers, n_revalidators);
|
|
|
|
|
ovsrcu_quiesce_end();
|
2014-02-25 08:01:01 -08:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
/* Notifies 'udpif' that something changed which may render previous
|
|
|
|
|
* xlate_actions() results invalid. */
|
|
|
|
|
void
|
|
|
|
|
udpif_revalidate(struct udpif *udpif)
|
|
|
|
|
{
|
2013-09-17 14:35:53 -07:00
|
|
|
|
seq_change(udpif->reval_seq);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-09-12 17:42:23 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Returns a seq which increments every time 'udpif' pulls stats from the
|
|
|
|
|
* datapath. Callers can use this to get a sense of when might be a good time
|
|
|
|
|
* to do periodic work which relies on relatively up to date statistics. */
|
|
|
|
|
struct seq *
|
|
|
|
|
udpif_dump_seq(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
return udpif->dump_seq;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-11-20 17:41:02 -08:00
|
|
|
|
void
|
|
|
|
|
udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
|
|
|
|
|
{
|
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
simap_increase(usage, "handlers", udpif->n_handlers);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
simap_increase(usage, "revalidators", udpif->n_revalidators);
|
|
|
|
|
for (i = 0; i < udpif->n_revalidators; i++) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_lock(&udpif->ukeys[i].mutex);
|
|
|
|
|
simap_increase(usage, "udpif keys", hmap_count(&udpif->ukeys[i].hmap));
|
|
|
|
|
ovs_mutex_unlock(&udpif->ukeys[i].mutex);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-11-20 17:41:02 -08:00
|
|
|
|
}
|
|
|
|
|
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
/* Remove flows from a single datapath. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
void
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush(struct udpif *udpif)
|
|
|
|
|
{
|
|
|
|
|
size_t n_handlers, n_revalidators;
|
|
|
|
|
|
|
|
|
|
n_handlers = udpif->n_handlers;
|
|
|
|
|
n_revalidators = udpif->n_revalidators;
|
|
|
|
|
|
2014-04-21 17:31:11 -07:00
|
|
|
|
ovsrcu_quiesce_start();
|
|
|
|
|
|
|
|
|
|
udpif_stop_threads(udpif);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
dpif_flow_flush(udpif->dpif);
|
2014-04-21 17:31:11 -07:00
|
|
|
|
udpif_start_threads(udpif, n_handlers, n_revalidators);
|
|
|
|
|
|
|
|
|
|
ovsrcu_quiesce_end();
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Removes all flows from all datapaths. */
|
|
|
|
|
static void
|
|
|
|
|
udpif_flush_all_datapaths(void)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush(udpif);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-05-14 16:19:34 +09:00
|
|
|
|
static unsigned long
|
2014-01-22 06:50:49 +00:00
|
|
|
|
udpif_get_n_flows(struct udpif *udpif)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-01-22 06:50:49 +00:00
|
|
|
|
long long int time, now;
|
2014-05-14 16:19:34 +09:00
|
|
|
|
unsigned long flow_count;
|
2014-01-22 06:50:49 +00:00
|
|
|
|
|
|
|
|
|
now = time_msec();
|
|
|
|
|
atomic_read(&udpif->n_flows_timestamp, &time);
|
|
|
|
|
if (time < now - 100 && !ovs_mutex_trylock(&udpif->n_flows_mutex)) {
|
|
|
|
|
struct dpif_dp_stats stats;
|
|
|
|
|
|
|
|
|
|
atomic_store(&udpif->n_flows_timestamp, now);
|
|
|
|
|
dpif_get_dp_stats(udpif->dpif, &stats);
|
|
|
|
|
flow_count = stats.n_flows;
|
|
|
|
|
atomic_store(&udpif->n_flows, flow_count);
|
|
|
|
|
ovs_mutex_unlock(&udpif->n_flows_mutex);
|
|
|
|
|
} else {
|
|
|
|
|
atomic_read(&udpif->n_flows, &flow_count);
|
|
|
|
|
}
|
|
|
|
|
return flow_count;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
/* The upcall handler thread tries to read a batch of FLOW_MISS_MAX_BATCH
|
|
|
|
|
* upcalls from dpif, processes the batch and installs corresponding flows
|
|
|
|
|
* in dpif. */
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static void *
|
2013-09-24 15:04:04 -07:00
|
|
|
|
udpif_upcall_handler(void *arg)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
|
|
|
|
struct handler *handler = arg;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
struct udpif *udpif = handler->udpif;
|
|
|
|
|
struct hmap misses = HMAP_INITIALIZER(&misses);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-12-27 16:29:24 -08:00
|
|
|
|
while (!latch_is_set(&handler->udpif->exit_latch)) {
|
2014-02-26 23:03:24 -08:00
|
|
|
|
struct upcall upcalls[FLOW_MISS_MAX_BATCH];
|
|
|
|
|
struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH];
|
|
|
|
|
struct flow_miss *miss;
|
|
|
|
|
size_t n_upcalls, i;
|
|
|
|
|
|
|
|
|
|
n_upcalls = read_upcalls(handler, upcalls, miss_buf, &misses);
|
|
|
|
|
if (!n_upcalls) {
|
|
|
|
|
dpif_recv_wait(udpif->dpif, handler->handler_id);
|
|
|
|
|
latch_wait(&udpif->exit_latch);
|
|
|
|
|
poll_block();
|
|
|
|
|
} else {
|
|
|
|
|
handle_upcalls(handler, &misses, upcalls, n_upcalls);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
HMAP_FOR_EACH (miss, hmap_node, &misses) {
|
|
|
|
|
xlate_out_uninit(&miss->xout);
|
|
|
|
|
}
|
|
|
|
|
hmap_clear(&misses);
|
|
|
|
|
for (i = 0; i < n_upcalls; i++) {
|
|
|
|
|
ofpbuf_uninit(&upcalls[i].dpif_upcall.packet);
|
|
|
|
|
ofpbuf_uninit(&upcalls[i].upcall_buf);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-09-23 10:24:05 -07:00
|
|
|
|
coverage_clear();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-02-26 23:03:24 -08:00
|
|
|
|
hmap_destroy(&misses);
|
2013-12-27 16:29:24 -08:00
|
|
|
|
|
|
|
|
|
return NULL;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
static void *
|
|
|
|
|
udpif_revalidator(void *arg)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Used by all revalidators. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct revalidator *revalidator = arg;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct udpif *udpif = revalidator->udpif;
|
|
|
|
|
bool leader = revalidator == &udpif->revalidators[0];
|
|
|
|
|
|
|
|
|
|
/* Used only by the leader. */
|
|
|
|
|
long long int start_time = 0;
|
|
|
|
|
uint64_t last_reval_seq = 0;
|
|
|
|
|
unsigned int flow_limit = 0;
|
|
|
|
|
size_t n_flows = 0;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
revalidator->id = ovsthread_id_self();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
for (;;) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (leader) {
|
|
|
|
|
uint64_t reval_seq;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
reval_seq = seq_read(udpif->reval_seq);
|
|
|
|
|
udpif->need_revalidate = last_reval_seq != reval_seq;
|
|
|
|
|
last_reval_seq = reval_seq;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
n_flows = udpif_get_n_flows(udpif);
|
|
|
|
|
udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
|
|
|
|
|
udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;
|
|
|
|
|
|
|
|
|
|
/* Only the leader checks the exit latch to prevent a race where
|
|
|
|
|
* some threads think it's true and exit and others think it's
|
|
|
|
|
* false and block indefinitely on the reval_barrier */
|
|
|
|
|
udpif->reval_exit = latch_is_set(&udpif->exit_latch);
|
|
|
|
|
|
|
|
|
|
start_time = time_msec();
|
|
|
|
|
if (!udpif->reval_exit) {
|
|
|
|
|
dpif_flow_dump_start(&udpif->dump, udpif->dpif);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Wait for the leader to start the flow dump. */
|
|
|
|
|
xpthread_barrier_wait(&udpif->reval_barrier);
|
|
|
|
|
if (udpif->reval_exit) {
|
|
|
|
|
break;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
revalidate(revalidator);
|
|
|
|
|
|
|
|
|
|
/* Wait for all flows to have been dumped before we garbage collect. */
|
|
|
|
|
xpthread_barrier_wait(&udpif->reval_barrier);
|
|
|
|
|
revalidator_sweep(revalidator);
|
|
|
|
|
|
|
|
|
|
/* Wait for all revalidators to finish garbage collection. */
|
|
|
|
|
xpthread_barrier_wait(&udpif->reval_barrier);
|
|
|
|
|
|
|
|
|
|
if (leader) {
|
|
|
|
|
long long int duration;
|
|
|
|
|
|
|
|
|
|
dpif_flow_dump_done(&udpif->dump);
|
|
|
|
|
seq_change(udpif->dump_seq);
|
|
|
|
|
|
|
|
|
|
duration = MAX(time_msec() - start_time, 1);
|
|
|
|
|
atomic_read(&udpif->flow_limit, &flow_limit);
|
|
|
|
|
udpif->dump_duration = duration;
|
|
|
|
|
if (duration > 2000) {
|
|
|
|
|
flow_limit /= duration / 1000;
|
|
|
|
|
} else if (duration > 1300) {
|
|
|
|
|
flow_limit = flow_limit * 3 / 4;
|
|
|
|
|
} else if (duration < 1000 && n_flows > 2000
|
|
|
|
|
&& flow_limit < n_flows * 1000 / duration) {
|
|
|
|
|
flow_limit += 1000;
|
|
|
|
|
}
|
|
|
|
|
flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000));
|
|
|
|
|
atomic_store(&udpif->flow_limit, flow_limit);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (duration > 2000) {
|
|
|
|
|
VLOG_INFO("Spent an unreasonably long %lldms dumping flows",
|
|
|
|
|
duration);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500));
|
|
|
|
|
seq_wait(udpif->reval_seq, last_reval_seq);
|
|
|
|
|
latch_wait(&udpif->exit_latch);
|
|
|
|
|
poll_block();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static enum upcall_type
|
|
|
|
|
classify_upcall(const struct upcall *upcall)
|
|
|
|
|
{
|
|
|
|
|
const struct dpif_upcall *dpif_upcall = &upcall->dpif_upcall;
|
|
|
|
|
union user_action_cookie cookie;
|
|
|
|
|
size_t userdata_len;
|
|
|
|
|
|
|
|
|
|
/* First look at the upcall type. */
|
|
|
|
|
switch (dpif_upcall->type) {
|
|
|
|
|
case DPIF_UC_ACTION:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case DPIF_UC_MISS:
|
|
|
|
|
return MISS_UPCALL;
|
|
|
|
|
|
|
|
|
|
case DPIF_N_UC_TYPES:
|
|
|
|
|
default:
|
|
|
|
|
VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32,
|
|
|
|
|
dpif_upcall->type);
|
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* "action" upcalls need a closer look. */
|
|
|
|
|
if (!dpif_upcall->userdata) {
|
|
|
|
|
VLOG_WARN_RL(&rl, "action upcall missing cookie");
|
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
|
|
|
|
userdata_len = nl_attr_get_size(dpif_upcall->userdata);
|
|
|
|
|
if (userdata_len < sizeof cookie.type
|
|
|
|
|
|| userdata_len > sizeof cookie) {
|
2013-11-25 23:38:48 -08:00
|
|
|
|
VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
|
2013-06-25 14:45:43 -07:00
|
|
|
|
userdata_len);
|
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
|
|
|
|
memset(&cookie, 0, sizeof cookie);
|
|
|
|
|
memcpy(&cookie, nl_attr_get(dpif_upcall->userdata), userdata_len);
|
2014-02-11 15:21:08 -08:00
|
|
|
|
if (userdata_len == MAX(8, sizeof cookie.sflow)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
&& cookie.type == USER_ACTION_COOKIE_SFLOW) {
|
|
|
|
|
return SFLOW_UPCALL;
|
2014-02-11 15:21:08 -08:00
|
|
|
|
} else if (userdata_len == MAX(8, sizeof cookie.slow_path)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
&& cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
|
|
|
|
|
return MISS_UPCALL;
|
2014-02-11 15:21:08 -08:00
|
|
|
|
} else if (userdata_len == MAX(8, sizeof cookie.flow_sample)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
&& cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
|
|
|
|
|
return FLOW_SAMPLE_UPCALL;
|
2014-02-11 15:21:08 -08:00
|
|
|
|
} else if (userdata_len == MAX(8, sizeof cookie.ipfix)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
&& cookie.type == USER_ACTION_COOKIE_IPFIX) {
|
|
|
|
|
return IPFIX_UPCALL;
|
|
|
|
|
} else {
|
|
|
|
|
VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
|
2013-11-25 23:38:48 -08:00
|
|
|
|
" and size %"PRIuSIZE, cookie.type, userdata_len);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
return BAD_UPCALL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
/* Calculates slow path actions for 'xout'. 'buf' must statically be
|
|
|
|
|
* initialized with at least 128 bytes of space. */
|
|
|
|
|
static void
|
|
|
|
|
compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
|
2014-02-26 23:03:24 -08:00
|
|
|
|
struct flow *flow, odp_port_t odp_in_port,
|
|
|
|
|
struct ofpbuf *buf)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
|
|
|
|
union user_action_cookie cookie;
|
|
|
|
|
odp_port_t port;
|
|
|
|
|
uint32_t pid;
|
|
|
|
|
|
|
|
|
|
cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
|
|
|
|
|
cookie.slow_path.unused = 0;
|
|
|
|
|
cookie.slow_path.reason = xout->slow;
|
|
|
|
|
|
|
|
|
|
port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
|
|
|
|
|
? ODPP_NONE
|
|
|
|
|
: odp_in_port;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
pid = dpif_port_get_pid(udpif->dpif, port, flow_hash_5tuple(flow, 0));
|
2013-09-24 13:39:56 -07:00
|
|
|
|
odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, buf);
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-25 14:45:43 -07:00
|
|
|
|
static struct flow_miss *
|
|
|
|
|
flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto,
|
|
|
|
|
const struct flow *flow, uint32_t hash)
|
|
|
|
|
{
|
|
|
|
|
struct flow_miss *miss;
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) {
|
|
|
|
|
if (miss->ofproto == ofproto && flow_equal(&miss->flow, flow)) {
|
|
|
|
|
return miss;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
/* Reads and classifies upcalls. Returns the number of upcalls successfully
|
|
|
|
|
* read. */
|
|
|
|
|
static size_t
|
|
|
|
|
read_upcalls(struct handler *handler,
|
|
|
|
|
struct upcall upcalls[FLOW_MISS_MAX_BATCH],
|
|
|
|
|
struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH],
|
|
|
|
|
struct hmap *misses)
|
2013-06-25 14:45:43 -07:00
|
|
|
|
{
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct udpif *udpif = handler->udpif;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
size_t i;
|
|
|
|
|
size_t n_misses = 0;
|
|
|
|
|
size_t n_upcalls = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
/*
|
|
|
|
|
* Try reading FLOW_MISS_MAX_BATCH upcalls from dpif.
|
|
|
|
|
*
|
|
|
|
|
* Extract the flow from each upcall. Construct in 'misses' a hash table
|
2013-09-24 13:39:56 -07:00
|
|
|
|
* that maps each unique flow to a 'struct flow_miss'.
|
2013-09-19 11:03:47 -07:00
|
|
|
|
*
|
|
|
|
|
* Most commonly there is a single packet per flow_miss, but there are
|
|
|
|
|
* several reasons why there might be more than one, e.g.:
|
|
|
|
|
*
|
|
|
|
|
* - The dpif packet interface does not support TSO (or UFO, etc.), so a
|
|
|
|
|
* large packet sent to userspace is split into a sequence of smaller
|
|
|
|
|
* ones.
|
2013-06-25 14:45:43 -07:00
|
|
|
|
*
|
2013-09-19 11:03:47 -07:00
|
|
|
|
* - A stream of quickly arriving packets in an established "slow-pathed"
|
|
|
|
|
* flow.
|
|
|
|
|
*
|
|
|
|
|
* - Rarely, a stream of quickly arriving packets in a flow not yet
|
|
|
|
|
* established. (This is rare because most protocols do not send
|
|
|
|
|
* multiple back-to-back packets before receiving a reply from the
|
|
|
|
|
* other end of the connection, which gives OVS a chance to set up a
|
|
|
|
|
* datapath flow.)
|
|
|
|
|
*/
|
2014-02-26 23:03:24 -08:00
|
|
|
|
for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) {
|
|
|
|
|
struct upcall *upcall = &upcalls[n_upcalls];
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct flow_miss *miss = &miss_buf[n_misses];
|
2014-02-26 23:03:24 -08:00
|
|
|
|
struct dpif_upcall *dupcall;
|
|
|
|
|
struct ofpbuf *packet;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct flow_miss *existing_miss;
|
|
|
|
|
struct ofproto_dpif *ofproto;
|
2013-09-24 15:04:04 -07:00
|
|
|
|
struct dpif_sflow *sflow;
|
|
|
|
|
struct dpif_ipfix *ipfix;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
struct flow flow;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
enum upcall_type type;
|
|
|
|
|
odp_port_t odp_in_port;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
|
|
|
|
|
sizeof upcall->upcall_stub);
|
|
|
|
|
error = dpif_recv(udpif->dpif, handler->handler_id,
|
|
|
|
|
&upcall->dpif_upcall, &upcall->upcall_buf);
|
|
|
|
|
if (error) {
|
|
|
|
|
ofpbuf_uninit(&upcall->upcall_buf);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dupcall = &upcall->dpif_upcall;
|
|
|
|
|
packet = &dupcall->packet;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
error = xlate_receive(udpif->backer, packet, dupcall->key,
|
2014-01-29 15:13:19 +09:00
|
|
|
|
dupcall->key_len, &flow,
|
2013-10-31 16:23:13 -07:00
|
|
|
|
&ofproto, &ipfix, &sflow, NULL, &odp_in_port);
|
2013-09-24 15:04:04 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
if (error == ENODEV) {
|
|
|
|
|
/* Received packet on datapath port for which we couldn't
|
|
|
|
|
* associate an ofproto. This can happen if a port is removed
|
|
|
|
|
* while traffic is being received. Print a rate-limited
|
|
|
|
|
* message in case it happens frequently. Install a drop flow
|
|
|
|
|
* so that future packets of the flow are inexpensively dropped
|
|
|
|
|
* in the kernel. */
|
|
|
|
|
VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
|
|
|
|
|
"port %"PRIu32, odp_in_port);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
dpif_flow_put(udpif->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
|
|
|
|
|
dupcall->key, dupcall->key_len, NULL, 0, NULL, 0,
|
|
|
|
|
NULL);
|
2013-09-24 15:04:04 -07:00
|
|
|
|
}
|
2014-02-26 23:03:24 -08:00
|
|
|
|
goto destroy_upcall;
|
2013-09-24 15:04:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type = classify_upcall(upcall);
|
|
|
|
|
if (type == MISS_UPCALL) {
|
2013-09-19 11:03:47 -07:00
|
|
|
|
uint32_t hash;
|
2014-03-21 10:36:52 -07:00
|
|
|
|
struct pkt_metadata md = pkt_metadata_from_flow(&flow);
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
2014-02-26 18:08:04 -08:00
|
|
|
|
flow_extract(packet, &md, &miss->flow);
|
2013-09-19 11:03:47 -07:00
|
|
|
|
hash = flow_hash(&miss->flow, 0);
|
2014-02-26 23:03:24 -08:00
|
|
|
|
existing_miss = flow_miss_find(misses, ofproto, &miss->flow,
|
2013-09-19 11:03:47 -07:00
|
|
|
|
hash);
|
|
|
|
|
if (!existing_miss) {
|
2014-02-26 23:03:24 -08:00
|
|
|
|
hmap_insert(misses, &miss->hmap_node, hash);
|
2013-09-19 11:03:47 -07:00
|
|
|
|
miss->ofproto = ofproto;
|
|
|
|
|
miss->key = dupcall->key;
|
|
|
|
|
miss->key_len = dupcall->key_len;
|
|
|
|
|
miss->upcall_type = dupcall->type;
|
|
|
|
|
miss->stats.n_packets = 0;
|
|
|
|
|
miss->stats.n_bytes = 0;
|
|
|
|
|
miss->stats.used = time_msec();
|
|
|
|
|
miss->stats.tcp_flags = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
miss->odp_in_port = odp_in_port;
|
2014-01-13 15:33:27 -08:00
|
|
|
|
miss->put = false;
|
2013-09-23 10:57:22 -07:00
|
|
|
|
n_misses++;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
} else {
|
2013-09-19 11:03:47 -07:00
|
|
|
|
miss = existing_miss;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-03-19 16:13:32 -07:00
|
|
|
|
miss->stats.tcp_flags |= ntohs(miss->flow.tcp_flags);
|
2014-03-30 01:31:50 -07:00
|
|
|
|
miss->stats.n_bytes += ofpbuf_size(packet);
|
2013-09-19 11:03:47 -07:00
|
|
|
|
miss->stats.n_packets++;
|
2013-06-25 14:45:43 -07:00
|
|
|
|
|
2013-09-19 11:03:47 -07:00
|
|
|
|
upcall->flow_miss = miss;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
n_upcalls++;
|
2013-09-24 15:04:04 -07:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
2013-09-24 15:04:04 -07:00
|
|
|
|
switch (type) {
|
|
|
|
|
case SFLOW_UPCALL:
|
|
|
|
|
if (sflow) {
|
|
|
|
|
union user_action_cookie cookie;
|
|
|
|
|
|
|
|
|
|
memset(&cookie, 0, sizeof cookie);
|
|
|
|
|
memcpy(&cookie, nl_attr_get(dupcall->userdata),
|
|
|
|
|
sizeof cookie.sflow);
|
2013-12-16 08:14:52 -08:00
|
|
|
|
dpif_sflow_received(sflow, packet, &flow, odp_in_port,
|
2013-09-24 15:04:04 -07:00
|
|
|
|
&cookie);
|
2013-09-19 11:03:47 -07:00
|
|
|
|
}
|
2013-09-24 15:04:04 -07:00
|
|
|
|
break;
|
|
|
|
|
case IPFIX_UPCALL:
|
|
|
|
|
if (ipfix) {
|
2013-12-16 08:14:52 -08:00
|
|
|
|
dpif_ipfix_bridge_sample(ipfix, packet, &flow);
|
2013-09-24 15:04:04 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case FLOW_SAMPLE_UPCALL:
|
|
|
|
|
if (ipfix) {
|
|
|
|
|
union user_action_cookie cookie;
|
|
|
|
|
|
|
|
|
|
memset(&cookie, 0, sizeof cookie);
|
|
|
|
|
memcpy(&cookie, nl_attr_get(dupcall->userdata),
|
|
|
|
|
sizeof cookie.flow_sample);
|
|
|
|
|
|
|
|
|
|
/* The flow reflects exactly the contents of the packet.
|
|
|
|
|
* Sample the packet using it. */
|
2013-12-16 08:14:52 -08:00
|
|
|
|
dpif_ipfix_flow_sample(ipfix, packet, &flow,
|
2013-09-24 15:04:04 -07:00
|
|
|
|
cookie.flow_sample.collector_set_id,
|
|
|
|
|
cookie.flow_sample.probability,
|
|
|
|
|
cookie.flow_sample.obs_domain_id,
|
|
|
|
|
cookie.flow_sample.obs_point_id);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case BAD_UPCALL:
|
|
|
|
|
break;
|
|
|
|
|
case MISS_UPCALL:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2013-09-24 15:04:04 -07:00
|
|
|
|
|
2013-10-31 16:23:13 -07:00
|
|
|
|
dpif_ipfix_unref(ipfix);
|
|
|
|
|
dpif_sflow_unref(sflow);
|
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
destroy_upcall:
|
|
|
|
|
ofpbuf_uninit(&upcall->dpif_upcall.packet);
|
|
|
|
|
ofpbuf_uninit(&upcall->upcall_buf);
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-26 23:03:24 -08:00
|
|
|
|
return n_upcalls;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
handle_upcalls(struct handler *handler, struct hmap *misses,
|
|
|
|
|
struct upcall *upcalls, size_t n_upcalls)
|
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif = handler->udpif;
|
|
|
|
|
struct dpif_op *opsp[FLOW_MISS_MAX_BATCH * 2];
|
|
|
|
|
struct dpif_op ops[FLOW_MISS_MAX_BATCH * 2];
|
|
|
|
|
struct flow_miss *miss;
|
|
|
|
|
size_t n_ops, i;
|
|
|
|
|
unsigned int flow_limit;
|
|
|
|
|
bool fail_open, may_put;
|
|
|
|
|
|
|
|
|
|
atomic_read(&udpif->flow_limit, &flow_limit);
|
|
|
|
|
may_put = udpif_get_n_flows(udpif) < flow_limit;
|
|
|
|
|
|
2013-09-19 11:03:47 -07:00
|
|
|
|
/* Initialize each 'struct flow_miss's ->xout.
|
|
|
|
|
*
|
|
|
|
|
* We do this per-flow_miss rather than per-packet because, most commonly,
|
|
|
|
|
* all the packets in a flow can use the same translation.
|
|
|
|
|
*
|
|
|
|
|
* We can't do this in the previous loop because we need the TCP flags for
|
|
|
|
|
* all the packets in each miss. */
|
|
|
|
|
fail_open = false;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
HMAP_FOR_EACH (miss, hmap_node, misses) {
|
2013-09-19 11:03:47 -07:00
|
|
|
|
struct xlate_in xin;
|
|
|
|
|
|
2013-10-09 13:23:31 -07:00
|
|
|
|
xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL,
|
2013-09-19 11:03:47 -07:00
|
|
|
|
miss->stats.tcp_flags, NULL);
|
|
|
|
|
xin.may_learn = true;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
if (miss->upcall_type == DPIF_UC_MISS) {
|
|
|
|
|
xin.resubmit_stats = &miss->stats;
|
|
|
|
|
} else {
|
|
|
|
|
/* For non-miss upcalls, there's a flow in the datapath which this
|
|
|
|
|
* packet was accounted to. Presumably the revalidators will deal
|
|
|
|
|
* with pushing its stats eventually. */
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-19 11:03:47 -07:00
|
|
|
|
xlate_actions(&xin, &miss->xout);
|
2013-10-09 13:23:31 -07:00
|
|
|
|
fail_open = fail_open || miss->xout.fail_open;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now handle the packets individually in order of arrival. In the common
|
|
|
|
|
* case each packet of a miss can share the same actions, but slow-pathed
|
|
|
|
|
* packets need to be translated individually:
|
|
|
|
|
*
|
|
|
|
|
* - For SLOW_CFM, SLOW_LACP, SLOW_STP, and SLOW_BFD, translation is what
|
|
|
|
|
* processes received packets for these protocols.
|
|
|
|
|
*
|
|
|
|
|
* - For SLOW_CONTROLLER, translation sends the packet to the OpenFlow
|
|
|
|
|
* controller.
|
|
|
|
|
*
|
|
|
|
|
* The loop fills 'ops' with an array of operations to execute in the
|
|
|
|
|
* datapath. */
|
|
|
|
|
n_ops = 0;
|
2014-02-26 23:03:24 -08:00
|
|
|
|
for (i = 0; i < n_upcalls; i++) {
|
|
|
|
|
struct upcall *upcall = &upcalls[i];
|
2013-09-19 11:03:47 -07:00
|
|
|
|
struct flow_miss *miss = upcall->flow_miss;
|
2013-12-16 08:14:52 -08:00
|
|
|
|
struct ofpbuf *packet = &upcall->dpif_upcall.packet;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct dpif_op *op;
|
2014-01-07 00:17:25 -08:00
|
|
|
|
ovs_be16 flow_vlan_tci;
|
|
|
|
|
|
|
|
|
|
/* Save a copy of flow.vlan_tci in case it is changed to
|
|
|
|
|
* generate proper mega flow masks for VLAN splinter flows. */
|
|
|
|
|
flow_vlan_tci = miss->flow.vlan_tci;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
|
|
|
|
if (miss->xout.slow) {
|
|
|
|
|
struct xlate_in xin;
|
|
|
|
|
|
2013-10-09 13:23:31 -07:00
|
|
|
|
xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, 0, packet);
|
2013-09-19 11:03:47 -07:00
|
|
|
|
xlate_actions_for_side_effects(&xin);
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-07 00:17:25 -08:00
|
|
|
|
if (miss->flow.in_port.ofp_port
|
|
|
|
|
!= vsp_realdev_to_vlandev(miss->ofproto,
|
|
|
|
|
miss->flow.in_port.ofp_port,
|
|
|
|
|
miss->flow.vlan_tci)) {
|
|
|
|
|
/* This packet was received on a VLAN splinter port. We
|
|
|
|
|
* added a VLAN to the packet to make the packet resemble
|
|
|
|
|
* the flow, but the actions were composed assuming that
|
|
|
|
|
* the packet contained no VLAN. So, we must remove the
|
|
|
|
|
* VLAN header from the packet before trying to execute the
|
|
|
|
|
* actions. */
|
2014-03-30 01:31:50 -07:00
|
|
|
|
if (ofpbuf_size(&miss->xout.odp_actions)) {
|
2014-01-07 00:17:25 -08:00
|
|
|
|
eth_pop_vlan(packet);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remove the flow vlan tags inserted by vlan splinter logic
|
|
|
|
|
* to ensure megaflow masks generated match the data path flow. */
|
|
|
|
|
miss->flow.vlan_tci = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
2014-01-13 15:33:27 -08:00
|
|
|
|
/* Do not install a flow into the datapath if:
|
|
|
|
|
*
|
|
|
|
|
* - The datapath already has too many flows.
|
|
|
|
|
*
|
|
|
|
|
* - An earlier iteration of this loop already put the same flow.
|
|
|
|
|
*
|
|
|
|
|
* - We received this packet via some flow installed in the kernel
|
|
|
|
|
* already. */
|
|
|
|
|
if (may_put
|
|
|
|
|
&& !miss->put
|
|
|
|
|
&& upcall->dpif_upcall.type == DPIF_UC_MISS) {
|
2014-01-07 00:17:25 -08:00
|
|
|
|
struct ofpbuf mask;
|
|
|
|
|
bool megaflow;
|
|
|
|
|
|
2014-01-13 15:33:27 -08:00
|
|
|
|
miss->put = true;
|
|
|
|
|
|
2014-01-07 00:17:25 -08:00
|
|
|
|
atomic_read(&enable_megaflows, &megaflow);
|
|
|
|
|
ofpbuf_use_stack(&mask, &miss->mask_buf, sizeof miss->mask_buf);
|
|
|
|
|
if (megaflow) {
|
2014-02-04 10:32:35 -08:00
|
|
|
|
size_t max_mpls;
|
2014-05-09 13:58:32 +12:00
|
|
|
|
bool recirc;
|
2014-02-04 10:32:35 -08:00
|
|
|
|
|
2014-05-09 13:58:32 +12:00
|
|
|
|
recirc = ofproto_dpif_get_enable_recirc(miss->ofproto);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
max_mpls = ofproto_dpif_get_max_mpls_depth(miss->ofproto);
|
2014-01-07 00:17:25 -08:00
|
|
|
|
odp_flow_key_from_mask(&mask, &miss->xout.wc.masks,
|
2014-05-09 13:58:32 +12:00
|
|
|
|
&miss->flow, UINT32_MAX, max_mpls,
|
|
|
|
|
recirc);
|
2014-01-07 00:17:25 -08:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
op = &ops[n_ops++];
|
|
|
|
|
op->type = DPIF_OP_FLOW_PUT;
|
|
|
|
|
op->u.flow_put.flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
|
|
|
|
|
op->u.flow_put.key = miss->key;
|
|
|
|
|
op->u.flow_put.key_len = miss->key_len;
|
2014-03-30 01:31:50 -07:00
|
|
|
|
op->u.flow_put.mask = ofpbuf_data(&mask);
|
|
|
|
|
op->u.flow_put.mask_len = ofpbuf_size(&mask);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
op->u.flow_put.stats = NULL;
|
|
|
|
|
|
|
|
|
|
if (!miss->xout.slow) {
|
2014-03-30 01:31:50 -07:00
|
|
|
|
op->u.flow_put.actions = ofpbuf_data(&miss->xout.odp_actions);
|
|
|
|
|
op->u.flow_put.actions_len = ofpbuf_size(&miss->xout.odp_actions);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
} else {
|
|
|
|
|
struct ofpbuf buf;
|
|
|
|
|
|
|
|
|
|
ofpbuf_use_stack(&buf, miss->slow_path_buf,
|
|
|
|
|
sizeof miss->slow_path_buf);
|
2014-02-26 23:03:24 -08:00
|
|
|
|
compose_slow_path(udpif, &miss->xout, &miss->flow,
|
|
|
|
|
miss->odp_in_port, &buf);
|
2014-03-30 01:31:50 -07:00
|
|
|
|
op->u.flow_put.actions = ofpbuf_data(&buf);
|
|
|
|
|
op->u.flow_put.actions_len = ofpbuf_size(&buf);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-07 00:17:25 -08:00
|
|
|
|
/*
|
|
|
|
|
* The 'miss' may be shared by multiple upcalls. Restore
|
|
|
|
|
* the saved flow vlan_tci field before processing the next
|
|
|
|
|
* upcall. */
|
|
|
|
|
miss->flow.vlan_tci = flow_vlan_tci;
|
|
|
|
|
|
2014-03-30 01:31:50 -07:00
|
|
|
|
if (ofpbuf_size(&miss->xout.odp_actions)) {
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
|
|
|
|
op = &ops[n_ops++];
|
|
|
|
|
op->type = DPIF_OP_EXECUTE;
|
|
|
|
|
op->u.execute.packet = packet;
|
2013-12-30 15:58:58 -08:00
|
|
|
|
odp_key_to_pkt_metadata(miss->key, miss->key_len,
|
|
|
|
|
&op->u.execute.md);
|
2014-03-30 01:31:50 -07:00
|
|
|
|
op->u.execute.actions = ofpbuf_data(&miss->xout.odp_actions);
|
|
|
|
|
op->u.execute.actions_len = ofpbuf_size(&miss->xout.odp_actions);
|
2013-10-09 17:28:05 -07:00
|
|
|
|
op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-19 11:03:47 -07:00
|
|
|
|
/* Special case for fail-open mode.
|
|
|
|
|
*
|
|
|
|
|
* If we are in fail-open mode, but we are connected to a controller too,
|
|
|
|
|
* then we should send the packet up to the controller in the hope that it
|
|
|
|
|
* will try to set up a flow and thereby allow us to exit fail-open.
|
|
|
|
|
*
|
2013-12-16 08:14:52 -08:00
|
|
|
|
* See the top-level comment in fail-open.c for more information.
|
|
|
|
|
*
|
|
|
|
|
* Copy packets before they are modified by execution. */
|
2013-09-19 11:03:47 -07:00
|
|
|
|
if (fail_open) {
|
2014-02-26 23:03:24 -08:00
|
|
|
|
for (i = 0; i < n_upcalls; i++) {
|
|
|
|
|
struct upcall *upcall = &upcalls[i];
|
2013-09-19 11:03:47 -07:00
|
|
|
|
struct flow_miss *miss = upcall->flow_miss;
|
2013-12-16 08:14:52 -08:00
|
|
|
|
struct ofpbuf *packet = &upcall->dpif_upcall.packet;
|
2013-10-22 16:16:31 -07:00
|
|
|
|
struct ofproto_packet_in *pin;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
|
|
|
|
|
pin = xmalloc(sizeof *pin);
|
2014-03-30 01:31:50 -07:00
|
|
|
|
pin->up.packet = xmemdup(ofpbuf_data(packet), ofpbuf_size(packet));
|
|
|
|
|
pin->up.packet_len = ofpbuf_size(packet);
|
2013-10-22 16:16:31 -07:00
|
|
|
|
pin->up.reason = OFPR_NO_MATCH;
|
|
|
|
|
pin->up.table_id = 0;
|
2013-10-22 16:57:46 -07:00
|
|
|
|
pin->up.cookie = OVS_BE64_MAX;
|
2013-10-22 16:16:31 -07:00
|
|
|
|
flow_get_metadata(&miss->flow, &pin->up.fmd);
|
2013-10-22 16:32:13 -07:00
|
|
|
|
pin->send_len = 0; /* Not used for flow table misses. */
|
2014-03-13 15:52:55 +09:00
|
|
|
|
pin->miss_type = OFPROTO_PACKET_IN_NO_MISS;
|
2013-09-19 11:03:47 -07:00
|
|
|
|
ofproto_dpif_send_packet_in(miss->ofproto, pin);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-16 08:14:52 -08:00
|
|
|
|
/* Execute batch. */
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
|
|
|
|
opsp[i] = &ops[i];
|
|
|
|
|
}
|
|
|
|
|
dpif_operate(udpif->dpif, opsp, n_ops);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Must be called with udpif->ukeys[hash % udpif->n_revalidators].mutex. */
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static struct udpif_key *
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ukey_lookup__(struct udpif *udpif, const struct nlattr *key, size_t key_len,
|
|
|
|
|
uint32_t hash)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
|
|
|
|
struct udpif_key *ukey;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct hmap *hmap = &udpif->ukeys[hash % udpif->n_revalidators].hmap;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
HMAP_FOR_EACH_WITH_HASH (ukey, hmap_node, hash, hmap) {
|
|
|
|
|
if (ukey->key_len == key_len && !memcmp(ukey->key, key, key_len)) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
return ukey;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
static struct udpif_key *
|
|
|
|
|
ukey_lookup(struct udpif *udpif, const struct nlattr *key, size_t key_len,
|
|
|
|
|
uint32_t hash)
|
|
|
|
|
{
|
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
uint32_t idx = hash % udpif->n_revalidators;
|
|
|
|
|
|
|
|
|
|
ovs_mutex_lock(&udpif->ukeys[idx].mutex);
|
|
|
|
|
ukey = ukey_lookup__(udpif, key, key_len, hash);
|
|
|
|
|
ovs_mutex_unlock(&udpif->ukeys[idx].mutex);
|
|
|
|
|
|
|
|
|
|
return ukey;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
static struct udpif_key *
|
|
|
|
|
ukey_create(const struct nlattr *key, size_t key_len, long long int used)
|
|
|
|
|
{
|
|
|
|
|
struct udpif_key *ukey = xmalloc(sizeof *ukey);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_init(&ukey->mutex);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
|
|
|
|
ukey->key = (struct nlattr *) &ukey->key_buf;
|
|
|
|
|
memcpy(&ukey->key_buf, key, key_len);
|
|
|
|
|
ukey->key_len = key_len;
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
ukey->mark = false;
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
ukey->flow_exists = true;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
ukey->created = used ? used : time_msec();
|
|
|
|
|
memset(&ukey->stats, 0, sizeof ukey->stats);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
ukey->xcache = NULL;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
|
|
|
|
return ukey;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Checks for a ukey in 'udpif->ukeys' with the same 'ukey->key' and 'hash',
|
|
|
|
|
* and inserts 'ukey' if it does not exist.
|
|
|
|
|
*
|
|
|
|
|
* Returns true if 'ukey' was inserted into 'udpif->ukeys', false otherwise. */
|
|
|
|
|
static bool
|
|
|
|
|
udpif_insert_ukey(struct udpif *udpif, struct udpif_key *ukey, uint32_t hash)
|
|
|
|
|
{
|
|
|
|
|
struct udpif_key *duplicate;
|
|
|
|
|
uint32_t idx = hash % udpif->n_revalidators;
|
|
|
|
|
bool ok;
|
|
|
|
|
|
|
|
|
|
ovs_mutex_lock(&udpif->ukeys[idx].mutex);
|
|
|
|
|
duplicate = ukey_lookup__(udpif, ukey->key, ukey->key_len, hash);
|
|
|
|
|
if (duplicate) {
|
|
|
|
|
ok = false;
|
|
|
|
|
} else {
|
|
|
|
|
hmap_insert(&udpif->ukeys[idx].hmap, &ukey->hmap_node, hash);
|
|
|
|
|
ok = true;
|
|
|
|
|
}
|
|
|
|
|
ovs_mutex_unlock(&udpif->ukeys[idx].mutex);
|
|
|
|
|
|
|
|
|
|
return ok;
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void
|
|
|
|
|
ukey_delete(struct revalidator *revalidator, struct udpif_key *ukey)
|
2014-04-10 07:14:08 +00:00
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (revalidator) {
|
|
|
|
|
hmap_remove(revalidator->ukeys, &ukey->hmap_node);
|
|
|
|
|
}
|
2014-04-10 16:00:28 +12:00
|
|
|
|
xlate_cache_delete(ukey->xcache);
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_destroy(&ukey->mutex);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
free(ukey);
|
|
|
|
|
}
|
|
|
|
|
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
static bool
|
|
|
|
|
should_revalidate(uint64_t packets, long long int used)
|
|
|
|
|
{
|
|
|
|
|
long long int metric, now, duration;
|
|
|
|
|
|
|
|
|
|
/* Calculate the mean time between seeing these packets. If this
|
|
|
|
|
* exceeds the threshold, then delete the flow rather than performing
|
|
|
|
|
* costly revalidation for flows that aren't being hit frequently.
|
|
|
|
|
*
|
|
|
|
|
* This is targeted at situations where the dump_duration is high (~1s),
|
|
|
|
|
* and revalidation is triggered by a call to udpif_revalidate(). In
|
|
|
|
|
* these situations, revalidation of all flows causes fluctuations in the
|
|
|
|
|
* flow_limit due to the interaction with the dump_duration and max_idle.
|
|
|
|
|
* This tends to result in deletion of low-throughput flows anyway, so
|
|
|
|
|
* skip the revalidation and just delete those flows. */
|
|
|
|
|
packets = MAX(packets, 1);
|
|
|
|
|
now = MAX(used, time_msec());
|
|
|
|
|
duration = now - used;
|
|
|
|
|
metric = duration / packets;
|
|
|
|
|
|
|
|
|
|
if (metric > 200) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static bool
|
2014-04-10 07:14:08 +00:00
|
|
|
|
revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
|
|
|
|
|
const struct nlattr *mask, size_t mask_len,
|
|
|
|
|
const struct nlattr *actions, size_t actions_len,
|
|
|
|
|
const struct dpif_flow_stats *stats)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
|
|
|
|
uint64_t slow_path_buf[128 / 8];
|
|
|
|
|
struct xlate_out xout, *xoutp;
|
2014-04-01 21:21:45 +09:00
|
|
|
|
struct netflow *netflow;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct ofproto_dpif *ofproto;
|
|
|
|
|
struct dpif_flow_stats push;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct ofpbuf xout_actions;
|
|
|
|
|
struct flow flow, dp_mask;
|
|
|
|
|
uint32_t *dp32, *xout32;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
odp_port_t odp_in_port;
|
|
|
|
|
struct xlate_in xin;
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
long long int last_used;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
int error;
|
|
|
|
|
size_t i;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
bool may_learn, ok;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
ok = false;
|
|
|
|
|
xoutp = NULL;
|
2014-04-01 21:21:45 +09:00
|
|
|
|
netflow = NULL;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
last_used = ukey->stats.used;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
push.used = stats->used;
|
|
|
|
|
push.tcp_flags = stats->tcp_flags;
|
|
|
|
|
push.n_packets = stats->n_packets > ukey->stats.n_packets
|
|
|
|
|
? stats->n_packets - ukey->stats.n_packets
|
2013-09-24 13:39:56 -07:00
|
|
|
|
: 0;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
push.n_bytes = stats->n_bytes > ukey->stats.n_bytes
|
|
|
|
|
? stats->n_bytes - ukey->stats.n_bytes
|
2013-09-24 13:39:56 -07:00
|
|
|
|
: 0;
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (!ukey->flow_exists) {
|
|
|
|
|
/* Don't bother revalidating if the flow was already deleted. */
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (udpif->need_revalidate && last_used
|
revalidator: Only revalidate high-throughput flows.
Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.
This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)
After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:
* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.
The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.
Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
Calculate the mean time between packets for comparison, rather than
comparing the number of packets since the last dump.
RFC: First post.
2014-03-04 09:36:37 -08:00
|
|
|
|
&& !should_revalidate(push.n_packets, last_used)) {
|
|
|
|
|
ok = false;
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-25 15:23:43 -07:00
|
|
|
|
/* We will push the stats, so update the ukey stats cache. */
|
|
|
|
|
ukey->stats = *stats;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (!push.n_packets && !udpif->need_revalidate) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
ok = true;
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 11:13:01 +09:00
|
|
|
|
may_learn = push.n_packets > 0;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (ukey->xcache && !udpif->need_revalidate) {
|
2014-04-10 16:00:28 +12:00
|
|
|
|
xlate_push_stats(ukey->xcache, may_learn, &push);
|
|
|
|
|
ok = true;
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
error = xlate_receive(udpif->backer, NULL, ukey->key, ukey->key_len, &flow,
|
2014-04-01 21:21:45 +09:00
|
|
|
|
&ofproto, NULL, NULL, &netflow, &odp_in_port);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (udpif->need_revalidate) {
|
2014-04-10 16:00:28 +12:00
|
|
|
|
xlate_cache_clear(ukey->xcache);
|
|
|
|
|
}
|
|
|
|
|
if (!ukey->xcache) {
|
|
|
|
|
ukey->xcache = xlate_cache_new();
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xlate_in_init(&xin, ofproto, &flow, NULL, push.tcp_flags, NULL);
|
|
|
|
|
xin.resubmit_stats = push.n_packets ? &push : NULL;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
xin.xcache = ukey->xcache;
|
|
|
|
|
xin.may_learn = may_learn;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
xin.skip_wildcards = !udpif->need_revalidate;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xlate_actions(&xin, &xout);
|
|
|
|
|
xoutp = &xout;
|
2013-09-23 10:57:22 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (!udpif->need_revalidate) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
ok = true;
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!xout.slow) {
|
2014-03-30 01:31:50 -07:00
|
|
|
|
ofpbuf_use_const(&xout_actions, ofpbuf_data(&xout.odp_actions),
|
|
|
|
|
ofpbuf_size(&xout.odp_actions));
|
2013-09-12 17:42:23 -07:00
|
|
|
|
} else {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
ofpbuf_use_stack(&xout_actions, slow_path_buf, sizeof slow_path_buf);
|
2014-02-26 23:03:24 -08:00
|
|
|
|
compose_slow_path(udpif, &xout, &flow, odp_in_port, &xout_actions);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (actions_len != ofpbuf_size(&xout_actions)
|
|
|
|
|
|| memcmp(ofpbuf_data(&xout_actions), actions, actions_len)) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (odp_flow_key_to_mask(mask, mask_len, &dp_mask, &flow)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
== ODP_FIT_ERROR) {
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Since the kernel is free to ignore wildcarded bits in the mask, we can't
|
|
|
|
|
* directly check that the masks are the same. Instead we check that the
|
|
|
|
|
* mask in the kernel is more specific i.e. less wildcarded, than what
|
|
|
|
|
* we've calculated here. This guarantees we don't catch any packets we
|
|
|
|
|
* shouldn't with the megaflow. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
dp32 = (uint32_t *) &dp_mask;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xout32 = (uint32_t *) &xout.wc.masks;
|
|
|
|
|
for (i = 0; i < FLOW_U32S; i++) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if ((dp32[i] | xout32[i]) != dp32[i]) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ok = true;
|
|
|
|
|
|
|
|
|
|
exit:
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2014-04-01 21:21:45 +09:00
|
|
|
|
if (netflow) {
|
|
|
|
|
if (!ok) {
|
|
|
|
|
netflow_expire(netflow, &flow);
|
|
|
|
|
netflow_flow_clear(netflow, &flow);
|
|
|
|
|
}
|
|
|
|
|
netflow_unref(netflow);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
xlate_out_uninit(xoutp);
|
|
|
|
|
return ok;
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
struct dump_op {
|
|
|
|
|
struct udpif_key *ukey;
|
|
|
|
|
struct dpif_flow_stats stats; /* Stats for 'op'. */
|
|
|
|
|
struct dpif_op op; /* Flow del operation. */
|
|
|
|
|
};
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
static void
|
2014-02-11 13:55:34 -08:00
|
|
|
|
dump_op_init(struct dump_op *op, const struct nlattr *key, size_t key_len,
|
2014-04-10 07:14:08 +00:00
|
|
|
|
struct udpif_key *ukey)
|
2014-02-11 13:55:34 -08:00
|
|
|
|
{
|
|
|
|
|
op->ukey = ukey;
|
|
|
|
|
op->op.type = DPIF_OP_FLOW_DEL;
|
|
|
|
|
op->op.u.flow_del.key = key;
|
|
|
|
|
op->op.u.flow_del.key_len = key_len;
|
|
|
|
|
op->op.u.flow_del.stats = &op->stats;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2014-04-10 07:14:08 +00:00
|
|
|
|
push_dump_ops__(struct udpif *udpif, struct dump_op *ops, size_t n_ops)
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2014-02-11 13:55:34 -08:00
|
|
|
|
struct dpif_op *opsp[REVALIDATE_MAX_BATCH];
|
|
|
|
|
size_t i;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-02-11 13:55:34 -08:00
|
|
|
|
ovs_assert(n_ops <= REVALIDATE_MAX_BATCH);
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
|
|
|
|
opsp[i] = &ops[i].op;
|
|
|
|
|
}
|
|
|
|
|
dpif_operate(udpif->dpif, opsp, n_ops);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
|
|
|
|
struct dump_op *op = &ops[i];
|
|
|
|
|
struct dpif_flow_stats *push, *stats, push_buf;
|
|
|
|
|
|
|
|
|
|
stats = op->op.u.flow_del.stats;
|
|
|
|
|
if (op->ukey) {
|
|
|
|
|
push = &push_buf;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_lock(&op->ukey->mutex);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
push->used = MAX(stats->used, op->ukey->stats.used);
|
|
|
|
|
push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags;
|
|
|
|
|
push->n_packets = stats->n_packets - op->ukey->stats.n_packets;
|
|
|
|
|
push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
} else {
|
|
|
|
|
push = stats;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (push->n_packets || netflow_exists()) {
|
|
|
|
|
struct ofproto_dpif *ofproto;
|
|
|
|
|
struct netflow *netflow;
|
|
|
|
|
struct flow flow;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
bool may_learn;
|
|
|
|
|
|
|
|
|
|
may_learn = push->n_packets > 0;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (op->ukey) {
|
|
|
|
|
ovs_mutex_lock(&op->ukey->mutex);
|
|
|
|
|
if (op->ukey->xcache) {
|
|
|
|
|
xlate_push_stats(op->ukey->xcache, may_learn, push);
|
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
ovs_mutex_unlock(&op->ukey->mutex);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
}
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
|
|
|
|
if (!xlate_receive(udpif->backer, NULL, op->op.u.flow_del.key,
|
|
|
|
|
op->op.u.flow_del.key_len, &flow, &ofproto,
|
|
|
|
|
NULL, NULL, &netflow, NULL)) {
|
|
|
|
|
struct xlate_in xin;
|
|
|
|
|
|
|
|
|
|
xlate_in_init(&xin, ofproto, &flow, NULL, push->tcp_flags,
|
|
|
|
|
NULL);
|
|
|
|
|
xin.resubmit_stats = push->n_packets ? push : NULL;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
xin.may_learn = may_learn;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
xin.skip_wildcards = true;
|
|
|
|
|
xlate_actions_for_side_effects(&xin);
|
|
|
|
|
|
|
|
|
|
if (netflow) {
|
|
|
|
|
netflow_expire(netflow, &flow);
|
|
|
|
|
netflow_flow_clear(netflow, &flow);
|
|
|
|
|
netflow_unref(netflow);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
static void
|
|
|
|
|
push_dump_ops(struct revalidator *revalidator,
|
|
|
|
|
struct dump_op *ops, size_t n_ops)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
2014-02-11 13:55:34 -08:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
push_dump_ops__(revalidator->udpif, ops, n_ops);
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
|
|
|
|
ukey_delete(revalidator, ops[i].ukey);
|
2014-02-11 13:55:34 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2014-04-10 07:14:08 +00:00
|
|
|
|
revalidate(struct revalidator *revalidator)
|
2014-02-11 13:55:34 -08:00
|
|
|
|
{
|
|
|
|
|
struct udpif *udpif = revalidator->udpif;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-02-11 13:55:33 -08:00
|
|
|
|
struct dump_op ops[REVALIDATE_MAX_BATCH];
|
2014-04-10 07:14:08 +00:00
|
|
|
|
const struct nlattr *key, *mask, *actions;
|
|
|
|
|
size_t key_len, mask_len, actions_len;
|
|
|
|
|
const struct dpif_flow_stats *stats;
|
|
|
|
|
long long int now;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unsigned int flow_limit;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
size_t n_ops;
|
|
|
|
|
void *state;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
n_ops = 0;
|
|
|
|
|
now = time_msec();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
atomic_read(&udpif->flow_limit, &flow_limit);
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
dpif_flow_dump_state_init(udpif->dpif, &state);
|
|
|
|
|
while (dpif_flow_dump_next(&udpif->dump, state, &key, &key_len, &mask,
|
|
|
|
|
&mask_len, &actions, &actions_len, &stats)) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct udpif_key *ukey;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
bool mark, may_destroy;
|
|
|
|
|
long long int used, max_idle;
|
|
|
|
|
uint32_t hash;
|
|
|
|
|
size_t n_flows;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
hash = hash_bytes(key, key_len, udpif->secret);
|
|
|
|
|
ukey = ukey_lookup(udpif, key, key_len, hash);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
used = stats->used;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
if (!used && ukey) {
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
|
|
|
|
|
|
|
|
|
if (ukey->mark || !ukey->flow_exists) {
|
|
|
|
|
/* The flow has already been dumped. This can occasionally
|
|
|
|
|
* occur if the datapath is changed in the middle of a flow
|
|
|
|
|
* dump. Rather than perform the same work twice, skip the
|
|
|
|
|
* flow this time. */
|
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
|
|
|
|
COVERAGE_INC(upcall_duplicate_flow);
|
2014-05-15 15:52:17 -07:00
|
|
|
|
goto next;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
used = ukey->created;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
n_flows = udpif_get_n_flows(udpif);
|
|
|
|
|
max_idle = ofproto_max_idle;
|
|
|
|
|
if (n_flows > flow_limit) {
|
|
|
|
|
max_idle = 100;
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if ((used && used < now - max_idle) || n_flows > flow_limit * 2) {
|
|
|
|
|
mark = false;
|
|
|
|
|
} else {
|
|
|
|
|
if (!ukey) {
|
|
|
|
|
ukey = ukey_create(key, key_len, used);
|
|
|
|
|
if (!udpif_insert_ukey(udpif, ukey, hash)) {
|
|
|
|
|
/* The same ukey has already been created. This means that
|
|
|
|
|
* another revalidator is processing this flow
|
|
|
|
|
* concurrently, so don't bother processing it. */
|
|
|
|
|
ukey_delete(NULL, ukey);
|
2014-05-15 15:52:17 -07:00
|
|
|
|
goto next;
|
2014-04-10 07:14:08 +00:00
|
|
|
|
}
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
mark = revalidate_ukey(udpif, ukey, mask, mask_len, actions,
|
|
|
|
|
actions_len, stats);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (ukey) {
|
|
|
|
|
ovs_mutex_lock(&ukey->mutex);
|
|
|
|
|
ukey->mark = ukey->flow_exists = mark;
|
|
|
|
|
ovs_mutex_unlock(&ukey->mutex);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
if (!mark) {
|
|
|
|
|
dump_op_init(&ops[n_ops++], key, key_len, ukey);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-15 15:52:17 -07:00
|
|
|
|
next:
|
2014-04-10 07:14:08 +00:00
|
|
|
|
may_destroy = dpif_flow_dump_next_may_destroy_keys(&udpif->dump,
|
|
|
|
|
state);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Only update 'now' immediately before 'buffer' will be updated.
|
|
|
|
|
* This gives us the current time relative to the time the datapath
|
|
|
|
|
* will write into 'stats'. */
|
|
|
|
|
if (may_destroy) {
|
|
|
|
|
now = time_msec();
|
|
|
|
|
}
|
2014-02-11 13:55:33 -08:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* Only do a dpif_operate when we've hit our maximum batch, or when our
|
|
|
|
|
* memory is about to be clobbered by the next call to
|
|
|
|
|
* dpif_flow_dump_next(). */
|
|
|
|
|
if (n_ops == REVALIDATE_MAX_BATCH || (n_ops && may_destroy)) {
|
|
|
|
|
push_dump_ops__(udpif, ops, n_ops);
|
|
|
|
|
n_ops = 0;
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2014-04-10 07:14:08 +00:00
|
|
|
|
|
|
|
|
|
if (n_ops) {
|
|
|
|
|
push_dump_ops__(udpif, ops, n_ops);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dpif_flow_dump_state_uninit(udpif->dpif, state);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2014-02-11 13:55:36 -08:00
|
|
|
|
revalidator_sweep__(struct revalidator *revalidator, bool purge)
|
2014-04-10 07:14:08 +00:00
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
2013-09-24 13:39:56 -07:00
|
|
|
|
{
|
2014-02-11 13:55:35 -08:00
|
|
|
|
struct dump_op ops[REVALIDATE_MAX_BATCH];
|
2013-09-24 13:39:56 -07:00
|
|
|
|
struct udpif_key *ukey, *next;
|
2014-02-11 13:55:35 -08:00
|
|
|
|
size_t n_ops;
|
|
|
|
|
|
|
|
|
|
n_ops = 0;
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
/* During garbage collection, this revalidator completely owns its ukeys
|
|
|
|
|
* map, and therefore doesn't need to do any locking. */
|
|
|
|
|
HMAP_FOR_EACH_SAFE (ukey, next, hmap_node, revalidator->ukeys) {
|
2014-02-11 13:55:36 -08:00
|
|
|
|
if (!purge && ukey->mark) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
ukey->mark = false;
|
revalidator: Prevent handling the same flow twice.
When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.
This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.
The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."
This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.
We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-23 15:31:17 +12:00
|
|
|
|
} else if (!ukey->flow_exists) {
|
|
|
|
|
ukey_delete(revalidator, ukey);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
} else {
|
2014-02-11 13:55:35 -08:00
|
|
|
|
struct dump_op *op = &ops[n_ops++];
|
|
|
|
|
|
|
|
|
|
/* If we have previously seen a flow in the datapath, but didn't
|
|
|
|
|
* see it during the most recent dump, delete it. This allows us
|
|
|
|
|
* to clean up the ukey and keep the statistics consistent. */
|
2014-04-10 07:14:08 +00:00
|
|
|
|
dump_op_init(op, ukey->key, ukey->key_len, ukey);
|
2014-02-11 13:55:35 -08:00
|
|
|
|
if (n_ops == REVALIDATE_MAX_BATCH) {
|
|
|
|
|
push_dump_ops(revalidator, ops, n_ops);
|
|
|
|
|
n_ops = 0;
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-02-11 13:55:35 -08:00
|
|
|
|
|
|
|
|
|
if (n_ops) {
|
|
|
|
|
push_dump_ops(revalidator, ops, n_ops);
|
|
|
|
|
}
|
2013-06-25 14:45:43 -07:00
|
|
|
|
}
|
2014-02-11 13:55:36 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
revalidator_sweep(struct revalidator *revalidator)
|
|
|
|
|
{
|
|
|
|
|
revalidator_sweep__(revalidator, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
revalidator_purge(struct revalidator *revalidator)
|
|
|
|
|
{
|
|
|
|
|
revalidator_sweep__(revalidator, true);
|
|
|
|
|
}
|
2013-11-20 18:06:12 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unsigned int flow_limit;
|
2013-11-20 18:06:12 -08:00
|
|
|
|
size_t i;
|
|
|
|
|
|
2013-09-24 13:39:56 -07:00
|
|
|
|
atomic_read(&udpif->flow_limit, &flow_limit);
|
|
|
|
|
|
2013-11-20 18:06:12 -08:00
|
|
|
|
ds_put_format(&ds, "%s:\n", dpif_name(udpif->dpif));
|
2014-05-14 16:19:34 +09:00
|
|
|
|
ds_put_format(&ds, "\tflows : (current %lu)"
|
2013-09-24 13:39:56 -07:00
|
|
|
|
" (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif),
|
|
|
|
|
udpif->avg_n_flows, udpif->max_n_flows, flow_limit);
|
|
|
|
|
ds_put_format(&ds, "\tdump duration : %lldms\n", udpif->dump_duration);
|
|
|
|
|
|
|
|
|
|
ds_put_char(&ds, '\n');
|
|
|
|
|
for (i = 0; i < n_revalidators; i++) {
|
|
|
|
|
struct revalidator *revalidator = &udpif->revalidators[i];
|
|
|
|
|
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_lock(&udpif->ukeys[i].mutex);
|
ovs-thread: Make caller provide thread name when creating a thread.
Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one. This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible. This also simplifies code, since two function calls become
only one.
This makes a few other changes to the thread creation function:
* Since it is no longer a direct wrapper around a pthread function,
rename it to avoid giving that impression.
* Remove 'pthread_attr_t *' param that every caller supplied as NULL.
* Change 'pthread *' parameter into a return value, for convenience.
The system-stats code hadn't set a thread name, so this fixes that issue.
This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself. Setting the thread
name earlier, as in this patch, avoids the problem.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
2014-04-25 17:46:21 -07:00
|
|
|
|
ds_put_format(&ds, "\t%u: (keys %"PRIuSIZE")\n",
|
|
|
|
|
revalidator->id, hmap_count(&udpif->ukeys[i].hmap));
|
2014-04-10 07:14:08 +00:00
|
|
|
|
ovs_mutex_unlock(&udpif->ukeys[i].mutex);
|
2013-09-24 13:39:56 -07:00
|
|
|
|
}
|
2013-11-20 18:06:12 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unixctl_command_reply(conn, ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
2013-09-24 13:39:56 -07:00
|
|
|
|
|
|
|
|
|
/* Disable using the megaflows.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_disable_megaflows(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED,
|
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
atomic_store(&enable_megaflows, false);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush_all_datapaths();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unixctl_command_reply(conn, "megaflows disabled");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Re-enable using megaflows.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_enable_megaflows(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED,
|
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
atomic_store(&enable_megaflows, true);
|
udpif: Bug fix updif_flush
Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor. Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.
Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.
The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.
dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.
Found during development.
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2014-03-13 21:48:55 -07:00
|
|
|
|
udpif_flush_all_datapaths();
|
2013-09-24 13:39:56 -07:00
|
|
|
|
unixctl_command_reply(conn, "megaflows enabled");
|
|
|
|
|
}
|
2014-02-06 09:49:19 -08:00
|
|
|
|
|
|
|
|
|
/* Set the flow limit.
|
|
|
|
|
*
|
|
|
|
|
* This command is only needed for advanced debugging, so it's not
|
|
|
|
|
* documented in the man page. */
|
|
|
|
|
static void
|
|
|
|
|
upcall_unixctl_set_flow_limit(struct unixctl_conn *conn,
|
|
|
|
|
int argc OVS_UNUSED,
|
|
|
|
|
const char *argv[] OVS_UNUSED,
|
|
|
|
|
void *aux OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
struct udpif *udpif;
|
|
|
|
|
unsigned int flow_limit = atoi(argv[1]);
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
|
|
|
|
|
atomic_store(&udpif->flow_limit, flow_limit);
|
|
|
|
|
}
|
|
|
|
|
ds_put_format(&ds, "set flow_limit to %u\n", flow_limit);
|
|
|
|
|
unixctl_command_reply(conn, ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|