2009-07-08 13:19:16 -07:00
|
|
|
|
/*
|
2013-06-24 10:54:49 -07:00
|
|
|
|
* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
|
2009-07-08 13:19:16 -07:00
|
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
2009-07-08 13:19:16 -07:00
|
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License.
|
2009-07-08 13:19:16 -07:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
2009-06-17 14:35:35 -07:00
|
|
|
|
#include "dpif-provider.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <inttypes.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
#include "coverage.h"
|
|
|
|
|
#include "dynamic-string.h"
|
|
|
|
|
#include "flow.h"
|
2010-12-03 14:41:38 -08:00
|
|
|
|
#include "netdev.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
#include "netlink.h"
|
2013-10-09 17:28:05 -07:00
|
|
|
|
#include "odp-execute.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
#include "odp-util.h"
|
2012-01-12 15:48:19 -08:00
|
|
|
|
#include "ofp-errors.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
#include "ofp-print.h"
|
2010-10-19 09:55:40 -07:00
|
|
|
|
#include "ofp-util.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
#include "ofpbuf.h"
|
|
|
|
|
#include "packets.h"
|
|
|
|
|
#include "poll-loop.h"
|
2010-02-01 11:36:01 -05:00
|
|
|
|
#include "shash.h"
|
2011-03-25 13:00:13 -07:00
|
|
|
|
#include "sset.h"
|
2011-01-26 07:11:50 -08:00
|
|
|
|
#include "timeval.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
#include "util.h"
|
|
|
|
|
#include "valgrind.h"
|
|
|
|
|
#include "vlog.h"
|
2010-07-16 11:02:49 -07:00
|
|
|
|
|
2010-10-19 14:47:01 -07:00
|
|
|
|
VLOG_DEFINE_THIS_MODULE(dpif);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
coverage: Make the coverage counters catalog program-specific.
Until now, the collection of coverage counters supported by a given OVS
program was not specific to that program. That means that, for example,
even though ovs-dpctl does not have anything to do with mac_learning, it
still has a coverage counter for it. This is confusing, at best.
This commit fixes the problem on some systems, in particular on ones that
use GCC and the GNU linker. It uses the feature of the GNU linker
described in its manual as:
If an orphaned section's name is representable as a C identifier then
the linker will automatically see PROVIDE two symbols: __start_SECNAME
and __end_SECNAME, where SECNAME is the name of the section. These
indicate the start address and end address of the orphaned section
respectively.
Systems that don't support these features retain the earlier behavior.
This commit also fixes the annoyance that files that include coverage
counters must be listed on COVERAGE_FILES in lib/automake.mk.
This commit also fixes the annoyance that modifying any source file that
includes a coverage counter caused all programs that link against
libopenvswitch.a to relink, even programs that the source file was not
linked into. For example, modifying ofproto/ofproto.c (which includes
coverage counters) caused tests/test-aes128 to relink, even though
test-aes128 does not link again ofproto.o.
2010-11-01 14:14:27 -07:00
|
|
|
|
COVERAGE_DEFINE(dpif_destroy);
|
|
|
|
|
COVERAGE_DEFINE(dpif_port_add);
|
|
|
|
|
COVERAGE_DEFINE(dpif_port_del);
|
|
|
|
|
COVERAGE_DEFINE(dpif_flow_flush);
|
|
|
|
|
COVERAGE_DEFINE(dpif_flow_get);
|
|
|
|
|
COVERAGE_DEFINE(dpif_flow_put);
|
|
|
|
|
COVERAGE_DEFINE(dpif_flow_del);
|
|
|
|
|
COVERAGE_DEFINE(dpif_execute);
|
|
|
|
|
COVERAGE_DEFINE(dpif_purge);
|
2013-10-09 17:28:05 -07:00
|
|
|
|
COVERAGE_DEFINE(dpif_execute_with_help);
|
coverage: Make the coverage counters catalog program-specific.
Until now, the collection of coverage counters supported by a given OVS
program was not specific to that program. That means that, for example,
even though ovs-dpctl does not have anything to do with mac_learning, it
still has a coverage counter for it. This is confusing, at best.
This commit fixes the problem on some systems, in particular on ones that
use GCC and the GNU linker. It uses the feature of the GNU linker
described in its manual as:
If an orphaned section's name is representable as a C identifier then
the linker will automatically see PROVIDE two symbols: __start_SECNAME
and __end_SECNAME, where SECNAME is the name of the section. These
indicate the start address and end address of the orphaned section
respectively.
Systems that don't support these features retain the earlier behavior.
This commit also fixes the annoyance that files that include coverage
counters must be listed on COVERAGE_FILES in lib/automake.mk.
This commit also fixes the annoyance that modifying any source file that
includes a coverage counter caused all programs that link against
libopenvswitch.a to relink, even programs that the source file was not
linked into. For example, modifying ofproto/ofproto.c (which includes
coverage counters) caused tests/test-aes128 to relink, even though
test-aes128 does not link again ofproto.o.
2010-11-01 14:14:27 -07:00
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
static const struct dpif_class *base_dpif_classes[] = {
|
2012-10-05 13:24:21 -07:00
|
|
|
|
#ifdef LINUX_DATAPATH
|
2009-06-17 14:35:35 -07:00
|
|
|
|
&dpif_linux_class,
|
2010-05-26 10:38:52 -07:00
|
|
|
|
#endif
|
2009-06-19 14:09:39 -07:00
|
|
|
|
&dpif_netdev_class,
|
2009-06-16 10:09:10 -07:00
|
|
|
|
};
|
2010-02-01 11:36:01 -05:00
|
|
|
|
|
|
|
|
|
struct registered_dpif_class {
|
2010-11-18 10:07:48 -08:00
|
|
|
|
const struct dpif_class *dpif_class;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
int refcount;
|
|
|
|
|
};
|
|
|
|
|
static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
|
2011-11-17 18:06:55 -08:00
|
|
|
|
static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist);
|
2009-06-16 10:09:10 -07:00
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
/* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */
|
2013-07-30 15:31:48 -07:00
|
|
|
|
static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER;
|
2013-07-25 10:31:42 -07:00
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
|
/* Rate limit for individual messages going to or from the datapath, output at
|
|
|
|
|
* DBG level. This is very high because, if these are enabled, it is because
|
|
|
|
|
* we really need to see them. */
|
|
|
|
|
static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
|
|
|
|
|
|
|
|
|
|
/* Not really much point in logging many dpif errors. */
|
2010-04-06 11:17:39 -07:00
|
|
|
|
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
static void log_flow_message(const struct dpif *dpif, int error,
|
|
|
|
|
const char *operation,
|
|
|
|
|
const struct nlattr *key, size_t key_len,
|
2013-06-20 13:43:56 -07:00
|
|
|
|
const struct nlattr *mask, size_t mask_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
const struct dpif_flow_stats *stats,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *actions, size_t actions_len);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
static void log_operation(const struct dpif *, const char *operation,
|
|
|
|
|
int error);
|
|
|
|
|
static bool should_log_flow_message(int error);
|
2011-12-26 14:39:03 -08:00
|
|
|
|
static void log_flow_put_message(struct dpif *, const struct dpif_flow_put *,
|
|
|
|
|
int error);
|
2012-04-17 21:52:10 -07:00
|
|
|
|
static void log_flow_del_message(struct dpif *, const struct dpif_flow_del *,
|
|
|
|
|
int error);
|
2011-12-26 14:39:03 -08:00
|
|
|
|
static void log_execute_message(struct dpif *, const struct dpif_execute *,
|
|
|
|
|
int error);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
static void
|
|
|
|
|
dp_initialize(void)
|
|
|
|
|
{
|
2013-04-23 14:35:29 -07:00
|
|
|
|
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
|
2013-04-23 14:35:29 -07:00
|
|
|
|
if (ovsthread_once_start(&once)) {
|
2010-02-01 11:36:01 -05:00
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
|
|
|
|
|
dp_register_provider(base_dpif_classes[i]);
|
|
|
|
|
}
|
2013-04-23 14:35:29 -07:00
|
|
|
|
ovsthread_once_done(&once);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
static int
|
|
|
|
|
dp_register_provider__(const struct dpif_class *new_class)
|
2010-02-01 11:36:01 -05:00
|
|
|
|
{
|
|
|
|
|
struct registered_dpif_class *registered_class;
|
|
|
|
|
|
2011-11-17 18:06:55 -08:00
|
|
|
|
if (sset_contains(&dpif_blacklist, new_class->type)) {
|
|
|
|
|
VLOG_DBG("attempted to register blacklisted provider: %s",
|
|
|
|
|
new_class->type);
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
if (shash_find(&dpif_classes, new_class->type)) {
|
|
|
|
|
VLOG_WARN("attempted to register duplicate datapath provider: %s",
|
|
|
|
|
new_class->type);
|
|
|
|
|
return EEXIST;
|
|
|
|
|
}
|
2010-01-22 14:37:10 -05:00
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
registered_class = xmalloc(sizeof *registered_class);
|
2010-11-18 10:07:48 -08:00
|
|
|
|
registered_class->dpif_class = new_class;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
registered_class->refcount = 0;
|
|
|
|
|
|
|
|
|
|
shash_add(&dpif_classes, new_class->type, registered_class);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
/* Registers a new datapath provider. After successful registration, new
|
|
|
|
|
* datapaths of that type can be opened using dpif_open(). */
|
|
|
|
|
int
|
|
|
|
|
dp_register_provider(const struct dpif_class *new_class)
|
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
error = dp_register_provider__(new_class);
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
/* Unregisters a datapath provider. 'type' must have been previously
|
|
|
|
|
* registered and not currently be in use by any dpifs. After unregistration
|
|
|
|
|
* new datapaths of that type cannot be opened using dpif_open(). */
|
2013-07-25 10:31:42 -07:00
|
|
|
|
static int
|
|
|
|
|
dp_unregister_provider__(const char *type)
|
2010-02-01 11:36:01 -05:00
|
|
|
|
{
|
|
|
|
|
struct shash_node *node;
|
|
|
|
|
struct registered_dpif_class *registered_class;
|
|
|
|
|
|
|
|
|
|
node = shash_find(&dpif_classes, type);
|
|
|
|
|
if (!node) {
|
|
|
|
|
VLOG_WARN("attempted to unregister a datapath provider that is not "
|
|
|
|
|
"registered: %s", type);
|
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
registered_class = node->data;
|
|
|
|
|
if (registered_class->refcount) {
|
|
|
|
|
VLOG_WARN("attempted to unregister in use datapath provider: %s", type);
|
|
|
|
|
return EBUSY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
shash_delete(&dpif_classes, node);
|
|
|
|
|
free(registered_class);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
/* Unregisters a datapath provider. 'type' must have been previously
|
|
|
|
|
* registered and not currently be in use by any dpifs. After unregistration
|
|
|
|
|
* new datapaths of that type cannot be opened using dpif_open(). */
|
|
|
|
|
int
|
|
|
|
|
dp_unregister_provider(const char *type)
|
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
dp_initialize();
|
|
|
|
|
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
error = dp_unregister_provider__(type);
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-11-17 18:06:55 -08:00
|
|
|
|
/* Blacklists a provider. Causes future calls of dp_register_provider() with
|
|
|
|
|
* a dpif_class which implements 'type' to fail. */
|
|
|
|
|
void
|
|
|
|
|
dp_blacklist_provider(const char *type)
|
|
|
|
|
{
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2011-11-17 18:06:55 -08:00
|
|
|
|
sset_add(&dpif_blacklist, type);
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2011-11-17 18:06:55 -08:00
|
|
|
|
}
|
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
/* Clears 'types' and enumerates the types of all currently registered datapath
|
2011-03-25 13:00:13 -07:00
|
|
|
|
* providers into it. The caller must first initialize the sset. */
|
2010-01-22 14:37:10 -05:00
|
|
|
|
void
|
2011-03-25 13:00:13 -07:00
|
|
|
|
dp_enumerate_types(struct sset *types)
|
2010-01-22 14:37:10 -05:00
|
|
|
|
{
|
2010-02-01 11:36:01 -05:00
|
|
|
|
struct shash_node *node;
|
2010-01-22 14:37:10 -05:00
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
dp_initialize();
|
2011-03-25 13:00:13 -07:00
|
|
|
|
sset_clear(types);
|
2010-01-22 14:37:10 -05:00
|
|
|
|
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
SHASH_FOR_EACH(node, &dpif_classes) {
|
|
|
|
|
const struct registered_dpif_class *registered_class = node->data;
|
2011-03-25 13:00:13 -07:00
|
|
|
|
sset_add(types, registered_class->dpif_class->type);
|
2010-01-22 14:37:10 -05:00
|
|
|
|
}
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dp_class_unref(struct registered_dpif_class *rc)
|
|
|
|
|
{
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
ovs_assert(rc->refcount);
|
|
|
|
|
rc->refcount--;
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct registered_dpif_class *
|
|
|
|
|
dp_class_lookup(const char *type)
|
|
|
|
|
{
|
|
|
|
|
struct registered_dpif_class *rc;
|
|
|
|
|
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
rc = shash_find_data(&dpif_classes, type);
|
|
|
|
|
if (rc) {
|
|
|
|
|
rc->refcount++;
|
|
|
|
|
}
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
|
|
|
|
|
return rc;
|
2010-01-22 14:37:10 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Clears 'names' and enumerates the names of all known created datapaths with
|
2011-03-25 13:00:13 -07:00
|
|
|
|
* the given 'type'. The caller must first initialize the sset. Returns 0 if
|
2010-01-22 14:37:10 -05:00
|
|
|
|
* successful, otherwise a positive errno value.
|
2009-07-06 11:06:36 -07:00
|
|
|
|
*
|
|
|
|
|
* Some kinds of datapaths might not be practically enumerable. This is not
|
|
|
|
|
* considered an error. */
|
|
|
|
|
int
|
2011-03-25 13:00:13 -07:00
|
|
|
|
dp_enumerate_names(const char *type, struct sset *names)
|
2009-07-06 11:06:36 -07:00
|
|
|
|
{
|
2013-07-25 10:31:42 -07:00
|
|
|
|
struct registered_dpif_class *registered_class;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
const struct dpif_class *dpif_class;
|
|
|
|
|
int error;
|
2009-07-06 11:06:36 -07:00
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
dp_initialize();
|
2011-03-25 13:00:13 -07:00
|
|
|
|
sset_clear(names);
|
2010-01-22 14:37:10 -05:00
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
registered_class = dp_class_lookup(type);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
if (!registered_class) {
|
|
|
|
|
VLOG_WARN("could not enumerate unknown type: %s", type);
|
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
|
}
|
2010-01-22 14:37:10 -05:00
|
|
|
|
|
2010-11-18 10:07:48 -08:00
|
|
|
|
dpif_class = registered_class->dpif_class;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
error = dpif_class->enumerate ? dpif_class->enumerate(names) : 0;
|
|
|
|
|
if (error) {
|
|
|
|
|
VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type,
|
2013-06-24 10:54:49 -07:00
|
|
|
|
ovs_strerror(error));
|
2009-07-06 11:06:36 -07:00
|
|
|
|
}
|
2013-07-25 10:31:42 -07:00
|
|
|
|
dp_class_unref(registered_class);
|
2010-01-22 14:37:10 -05:00
|
|
|
|
|
2010-02-01 11:36:01 -05:00
|
|
|
|
return error;
|
2010-01-22 14:37:10 -05:00
|
|
|
|
}
|
|
|
|
|
|
2011-04-11 15:07:07 -07:00
|
|
|
|
/* Parses 'datapath_name_', which is of the form [type@]name into its
|
|
|
|
|
* component pieces. 'name' and 'type' must be freed by the caller.
|
|
|
|
|
*
|
|
|
|
|
* The returned 'type' is normalized, as if by dpif_normalize_type(). */
|
2010-01-22 14:37:10 -05:00
|
|
|
|
void
|
|
|
|
|
dp_parse_name(const char *datapath_name_, char **name, char **type)
|
|
|
|
|
{
|
|
|
|
|
char *datapath_name = xstrdup(datapath_name_);
|
|
|
|
|
char *separator;
|
|
|
|
|
|
|
|
|
|
separator = strchr(datapath_name, '@');
|
|
|
|
|
if (separator) {
|
|
|
|
|
*separator = '\0';
|
|
|
|
|
*type = datapath_name;
|
2011-04-11 15:07:07 -07:00
|
|
|
|
*name = xstrdup(dpif_normalize_type(separator + 1));
|
2010-01-22 14:37:10 -05:00
|
|
|
|
} else {
|
|
|
|
|
*name = datapath_name;
|
2011-04-11 15:07:07 -07:00
|
|
|
|
*type = xstrdup(dpif_normalize_type(NULL));
|
2010-01-22 14:37:10 -05:00
|
|
|
|
}
|
2009-07-06 11:06:36 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
static int
|
2010-01-22 14:37:10 -05:00
|
|
|
|
do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2009-06-17 14:35:35 -07:00
|
|
|
|
struct dpif *dpif = NULL;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int error;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
struct registered_dpif_class *registered_class;
|
|
|
|
|
|
|
|
|
|
dp_initialize();
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
2011-04-05 12:52:58 -07:00
|
|
|
|
type = dpif_normalize_type(type);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
registered_class = dp_class_lookup(type);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
if (!registered_class) {
|
|
|
|
|
VLOG_WARN("could not create datapath %s of unknown type %s", name,
|
|
|
|
|
type);
|
|
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-18 10:06:41 -08:00
|
|
|
|
error = registered_class->dpif_class->open(registered_class->dpif_class,
|
|
|
|
|
name, create, &dpif);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
if (!error) {
|
2012-11-06 13:14:55 -08:00
|
|
|
|
ovs_assert(dpif->dpif_class == registered_class->dpif_class);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
} else {
|
|
|
|
|
dp_class_unref(registered_class);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
exit:
|
|
|
|
|
*dpifp = error ? NULL : dpif;
|
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
|
/* Tries to open an existing datapath named 'name' and type 'type'. Will fail
|
|
|
|
|
* if no datapath with 'name' and 'type' exists. 'type' may be either NULL or
|
|
|
|
|
* the empty string to specify the default system type. Returns 0 if
|
|
|
|
|
* successful, otherwise a positive errno value. On success stores a pointer
|
|
|
|
|
* to the datapath in '*dpifp', otherwise a null pointer. */
|
2009-06-17 14:35:35 -07:00
|
|
|
|
int
|
2010-01-22 14:37:10 -05:00
|
|
|
|
dpif_open(const char *name, const char *type, struct dpif **dpifp)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2010-01-22 14:37:10 -05:00
|
|
|
|
return do_open(name, type, false, dpifp);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
|
/* Tries to create and open a new datapath with the given 'name' and 'type'.
|
|
|
|
|
* 'type' may be either NULL or the empty string to specify the default system
|
|
|
|
|
* type. Will fail if a datapath with 'name' and 'type' already exists.
|
|
|
|
|
* Returns 0 if successful, otherwise a positive errno value. On success
|
|
|
|
|
* stores a pointer to the datapath in '*dpifp', otherwise a null pointer. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2010-01-22 14:37:10 -05:00
|
|
|
|
dpif_create(const char *name, const char *type, struct dpif **dpifp)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2010-01-22 14:37:10 -05:00
|
|
|
|
return do_open(name, type, true, dpifp);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
|
/* Tries to open a datapath with the given 'name' and 'type', creating it if it
|
|
|
|
|
* does not exist. 'type' may be either NULL or the empty string to specify
|
|
|
|
|
* the default system type. Returns 0 if successful, otherwise a positive
|
|
|
|
|
* errno value. On success stores a pointer to the datapath in '*dpifp',
|
|
|
|
|
* otherwise a null pointer. */
|
2009-11-23 11:09:19 -08:00
|
|
|
|
int
|
2010-01-22 14:37:10 -05:00
|
|
|
|
dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp)
|
2009-11-23 11:09:19 -08:00
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
|
error = dpif_create(name, type, dpifp);
|
2009-11-23 11:09:19 -08:00
|
|
|
|
if (error == EEXIST || error == EBUSY) {
|
2010-01-22 14:37:10 -05:00
|
|
|
|
error = dpif_open(name, type, dpifp);
|
2009-11-23 11:09:19 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
VLOG_WARN("datapath %s already exists but cannot be opened: %s",
|
2013-06-24 10:54:49 -07:00
|
|
|
|
name, ovs_strerror(error));
|
2009-11-23 11:09:19 -08:00
|
|
|
|
}
|
|
|
|
|
} else if (error) {
|
2013-06-24 10:54:49 -07:00
|
|
|
|
VLOG_WARN("failed to create datapath %s: %s",
|
|
|
|
|
name, ovs_strerror(error));
|
2009-11-23 11:09:19 -08:00
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Closes and frees the connection to 'dpif'. Does not destroy the datapath
|
|
|
|
|
* itself; call dpif_delete() first, instead, if that is desirable. */
|
|
|
|
|
void
|
|
|
|
|
dpif_close(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
if (dpif) {
|
2013-07-25 10:31:42 -07:00
|
|
|
|
struct registered_dpif_class *rc;
|
2010-02-01 11:36:01 -05:00
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
rc = shash_find_data(&dpif_classes, dpif->dpif_class->type);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
dpif_uninit(dpif, true);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
dp_class_unref(rc);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-05-06 15:04:29 -07:00
|
|
|
|
/* Performs periodic work needed by 'dpif'. */
|
|
|
|
|
void
|
|
|
|
|
dpif_run(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
if (dpif->dpif_class->run) {
|
|
|
|
|
dpif->dpif_class->run(dpif);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Arranges for poll_block() to wake up when dp_run() needs to be called for
|
|
|
|
|
* 'dpif'. */
|
|
|
|
|
void
|
|
|
|
|
dpif_wait(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
if (dpif->dpif_class->wait) {
|
|
|
|
|
dpif->dpif_class->wait(dpif);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
|
/* Returns the name of datapath 'dpif' prefixed with the type
|
|
|
|
|
* (for use in log messages). */
|
2009-06-16 11:00:22 -07:00
|
|
|
|
const char *
|
|
|
|
|
dpif_name(const struct dpif *dpif)
|
|
|
|
|
{
|
2010-01-22 14:37:10 -05:00
|
|
|
|
return dpif->full_name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns the name of datapath 'dpif' without the type
|
|
|
|
|
* (for use in device names). */
|
|
|
|
|
const char *
|
|
|
|
|
dpif_base_name(const struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
return dpif->base_name;
|
2009-06-16 11:00:22 -07:00
|
|
|
|
}
|
|
|
|
|
|
2012-11-01 16:04:06 -07:00
|
|
|
|
/* Returns the type of datapath 'dpif'. */
|
|
|
|
|
const char *
|
|
|
|
|
dpif_type(const struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
return dpif->dpif_class->type;
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-05 12:52:58 -07:00
|
|
|
|
/* Returns the fully spelled out name for the given datapath 'type'.
|
|
|
|
|
*
|
|
|
|
|
* Normalized type string can be compared with strcmp(). Unnormalized type
|
|
|
|
|
* string might be the same even if they have different spellings. */
|
|
|
|
|
const char *
|
|
|
|
|
dpif_normalize_type(const char *type)
|
|
|
|
|
{
|
|
|
|
|
return type && type[0] ? type : "system";
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Destroys the datapath that 'dpif' is connected to, first removing all of its
|
|
|
|
|
* ports. After calling this function, it does not make sense to pass 'dpif'
|
|
|
|
|
* to any functions other than dpif_name() or dpif_close(). */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
|
|
|
|
dpif_delete(struct dpif *dpif)
|
|
|
|
|
{
|
2009-06-17 14:35:35 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
|
COVERAGE_INC(dpif_destroy);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
|
error = dpif->dpif_class->destroy(dpif);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
log_operation(dpif, "delete", error);
|
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Retrieves statistics for 'dpif' into 'stats'. Returns 0 if successful,
|
|
|
|
|
* otherwise a positive errno value. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2011-10-05 11:18:13 -07:00
|
|
|
|
dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
int error = dpif->dpif_class->get_stats(dpif, stats);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
memset(stats, 0, sizeof *stats);
|
|
|
|
|
}
|
|
|
|
|
log_operation(dpif, "get_stats", error);
|
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2012-11-14 15:50:20 -08:00
|
|
|
|
const char *
|
|
|
|
|
dpif_port_open_type(const char *datapath_type, const char *port_type)
|
|
|
|
|
{
|
2013-07-25 10:31:42 -07:00
|
|
|
|
struct registered_dpif_class *rc;
|
2012-11-14 15:50:20 -08:00
|
|
|
|
|
|
|
|
|
datapath_type = dpif_normalize_type(datapath_type);
|
|
|
|
|
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_lock(&dpif_mutex);
|
2013-07-25 10:31:42 -07:00
|
|
|
|
rc = shash_find_data(&dpif_classes, datapath_type);
|
|
|
|
|
if (rc && rc->dpif_class->port_open_type) {
|
|
|
|
|
port_type = rc->dpif_class->port_open_type(rc->dpif_class, port_type);
|
2012-11-14 15:50:20 -08:00
|
|
|
|
}
|
2013-07-30 15:31:48 -07:00
|
|
|
|
ovs_mutex_unlock(&dpif_mutex);
|
2012-11-14 15:50:20 -08:00
|
|
|
|
|
2013-07-25 10:31:42 -07:00
|
|
|
|
return port_type;
|
2012-11-14 15:50:20 -08:00
|
|
|
|
}
|
|
|
|
|
|
2012-07-27 23:58:24 -07:00
|
|
|
|
/* Attempts to add 'netdev' as a port on 'dpif'. If 'port_nop' is
|
2013-06-19 16:58:44 -07:00
|
|
|
|
* non-null and its value is not ODPP_NONE, then attempts to use the
|
2012-07-27 23:58:24 -07:00
|
|
|
|
* value as the port number.
|
|
|
|
|
*
|
|
|
|
|
* If successful, returns 0 and sets '*port_nop' to the new port's port
|
|
|
|
|
* number (if 'port_nop' is non-null). On failure, returns a positive
|
2013-06-19 16:58:44 -07:00
|
|
|
|
* errno value and sets '*port_nop' to ODPP_NONE (if 'port_nop' is
|
2012-07-27 23:58:24 -07:00
|
|
|
|
* non-null). */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2013-06-19 16:58:44 -07:00
|
|
|
|
dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2010-12-03 14:41:38 -08:00
|
|
|
|
const char *netdev_name = netdev_get_name(netdev);
|
2013-06-19 16:58:44 -07:00
|
|
|
|
odp_port_t port_no = ODPP_NONE;
|
2009-06-17 14:26:19 -07:00
|
|
|
|
int error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_port_add);
|
2009-06-17 14:26:19 -07:00
|
|
|
|
|
2012-07-27 23:58:24 -07:00
|
|
|
|
if (port_nop) {
|
|
|
|
|
port_no = *port_nop;
|
|
|
|
|
}
|
|
|
|
|
|
2010-12-03 14:41:38 -08:00
|
|
|
|
error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
|
2009-06-17 14:26:19 -07:00
|
|
|
|
if (!error) {
|
2012-09-26 16:22:47 -07:00
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32,
|
2010-12-03 14:41:38 -08:00
|
|
|
|
dpif_name(dpif), netdev_name, port_no);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
} else {
|
2009-06-17 14:26:19 -07:00
|
|
|
|
VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
|
2013-06-24 10:54:49 -07:00
|
|
|
|
dpif_name(dpif), netdev_name, ovs_strerror(error));
|
2013-06-19 16:58:44 -07:00
|
|
|
|
port_no = ODPP_NONE;
|
2009-06-17 14:26:19 -07:00
|
|
|
|
}
|
|
|
|
|
if (port_nop) {
|
|
|
|
|
*port_nop = port_no;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2009-06-17 14:26:19 -07:00
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Attempts to remove 'dpif''s port number 'port_no'. Returns 0 if successful,
|
|
|
|
|
* otherwise a positive errno value. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2013-06-19 16:58:44 -07:00
|
|
|
|
dpif_port_del(struct dpif *dpif, odp_port_t port_no)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2009-06-17 14:35:35 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
|
COVERAGE_INC(dpif_port_del);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
|
error = dpif->dpif_class->port_del(dpif, port_no);
|
2010-12-03 14:42:28 -08:00
|
|
|
|
if (!error) {
|
2012-09-26 16:22:47 -07:00
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: port_del(%"PRIu32")",
|
2010-12-03 14:42:28 -08:00
|
|
|
|
dpif_name(dpif), port_no);
|
|
|
|
|
} else {
|
|
|
|
|
log_operation(dpif, "port_del", error);
|
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-01-23 18:48:02 -08:00
|
|
|
|
/* Makes a deep copy of 'src' into 'dst'. */
|
|
|
|
|
void
|
|
|
|
|
dpif_port_clone(struct dpif_port *dst, const struct dpif_port *src)
|
|
|
|
|
{
|
|
|
|
|
dst->name = xstrdup(src->name);
|
|
|
|
|
dst->type = xstrdup(src->type);
|
|
|
|
|
dst->port_no = src->port_no;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Frees memory allocated to members of 'dpif_port'.
|
|
|
|
|
*
|
|
|
|
|
* Do not call this function on a dpif_port obtained from
|
|
|
|
|
* dpif_port_dump_next(): that function retains ownership of the data in the
|
|
|
|
|
* dpif_port. */
|
|
|
|
|
void
|
|
|
|
|
dpif_port_destroy(struct dpif_port *dpif_port)
|
|
|
|
|
{
|
|
|
|
|
free(dpif_port->name);
|
|
|
|
|
free(dpif_port->type);
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-17 23:11:53 -07:00
|
|
|
|
/* Checks if port named 'devname' exists in 'dpif'. If so, returns
|
|
|
|
|
* true; otherwise, returns false. */
|
|
|
|
|
bool
|
|
|
|
|
dpif_port_exists(const struct dpif *dpif, const char *devname)
|
|
|
|
|
{
|
|
|
|
|
int error = dpif->dpif_class->port_query_by_name(dpif, devname, NULL);
|
2012-11-15 13:05:58 -08:00
|
|
|
|
if (error != 0 && error != ENOENT && error != ENODEV) {
|
2012-10-17 23:11:53 -07:00
|
|
|
|
VLOG_WARN_RL(&error_rl, "%s: failed to query port %s: %s",
|
2013-06-24 10:54:49 -07:00
|
|
|
|
dpif_name(dpif), devname, ovs_strerror(error));
|
2012-10-17 23:11:53 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return !error;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and
|
|
|
|
|
* initializes '*port' appropriately; on failure, returns a positive errno
|
2011-01-23 18:48:02 -08:00
|
|
|
|
* value.
|
|
|
|
|
*
|
|
|
|
|
* The caller owns the data in 'port' and must free it with
|
|
|
|
|
* dpif_port_destroy() when it is no longer needed. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2013-06-19 16:58:44 -07:00
|
|
|
|
dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dpif_port *port)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
if (!error) {
|
2012-09-26 16:22:47 -07:00
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu32" is device %s",
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_name(dpif), port_no, port->name);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
} else {
|
2009-06-17 14:35:35 -07:00
|
|
|
|
memset(port, 0, sizeof *port);
|
2012-09-26 16:22:47 -07:00
|
|
|
|
VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s",
|
2013-06-24 10:54:49 -07:00
|
|
|
|
dpif_name(dpif), port_no, ovs_strerror(error));
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Looks up port named 'devname' in 'dpif'. On success, returns 0 and
|
|
|
|
|
* initializes '*port' appropriately; on failure, returns a positive errno
|
2011-01-23 18:48:02 -08:00
|
|
|
|
* value.
|
|
|
|
|
*
|
|
|
|
|
* The caller owns the data in 'port' and must free it with
|
|
|
|
|
* dpif_port_destroy() when it is no longer needed. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
|
|
|
|
dpif_port_query_by_name(const struct dpif *dpif, const char *devname,
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dpif_port *port)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
int error = dpif->dpif_class->port_query_by_name(dpif, devname, port);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
if (!error) {
|
2012-09-26 16:22:47 -07:00
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu32,
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_name(dpif), devname, port->port_no);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
} else {
|
2009-06-17 14:35:35 -07:00
|
|
|
|
memset(port, 0, sizeof *port);
|
|
|
|
|
|
2011-04-07 14:43:14 -07:00
|
|
|
|
/* For ENOENT or ENODEV we use DBG level because the caller is probably
|
|
|
|
|
* interested in whether 'dpif' actually has a port 'devname', so that
|
|
|
|
|
* it's not an issue worth logging if it doesn't. Other errors are
|
|
|
|
|
* uncommon and more likely to indicate a real problem. */
|
|
|
|
|
VLOG_RL(&error_rl,
|
|
|
|
|
error == ENOENT || error == ENODEV ? VLL_DBG : VLL_WARN,
|
|
|
|
|
"%s: failed to query port %s: %s",
|
2013-06-24 10:54:49 -07:00
|
|
|
|
dpif_name(dpif), devname, ovs_strerror(error));
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-10-12 16:24:54 -07:00
|
|
|
|
/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions
|
|
|
|
|
* as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose
|
|
|
|
|
* packets arrived on port 'port_no'.
|
|
|
|
|
*
|
2013-06-19 16:58:44 -07:00
|
|
|
|
* A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not
|
2012-05-05 11:07:42 -07:00
|
|
|
|
* allocated to any port, that the client may use for special purposes.
|
|
|
|
|
*
|
2011-10-12 16:24:54 -07:00
|
|
|
|
* The return value is only meaningful when DPIF_UC_ACTION has been enabled in
|
|
|
|
|
* the 'dpif''s listen mask. It is allowed to change when DPIF_UC_ACTION is
|
|
|
|
|
* disabled and then re-enabled, so a client that does that must be prepared to
|
|
|
|
|
* update all of the flows that it installed that contain
|
|
|
|
|
* OVS_ACTION_ATTR_USERSPACE actions. */
|
|
|
|
|
uint32_t
|
2013-06-19 16:58:44 -07:00
|
|
|
|
dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no)
|
2011-10-12 16:24:54 -07:00
|
|
|
|
{
|
|
|
|
|
return (dpif->dpif_class->port_get_pid
|
|
|
|
|
? (dpif->dpif_class->port_get_pid)(dpif, port_no)
|
|
|
|
|
: 0);
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies
|
|
|
|
|
* the port's name into the 'name_size' bytes in 'name', ensuring that the
|
|
|
|
|
* result is null-terminated. On failure, returns a positive errno value and
|
|
|
|
|
* makes 'name' the empty string. */
|
2009-06-15 16:51:46 -07:00
|
|
|
|
int
|
2013-06-19 16:58:44 -07:00
|
|
|
|
dpif_port_get_name(struct dpif *dpif, odp_port_t port_no,
|
2009-06-15 16:51:46 -07:00
|
|
|
|
char *name, size_t name_size)
|
|
|
|
|
{
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dpif_port port;
|
2009-06-15 16:51:46 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
2012-11-06 13:14:55 -08:00
|
|
|
|
ovs_assert(name_size > 0);
|
2009-06-15 16:51:46 -07:00
|
|
|
|
|
|
|
|
|
error = dpif_port_query_by_number(dpif, port_no, &port);
|
|
|
|
|
if (!error) {
|
2011-01-23 18:48:02 -08:00
|
|
|
|
ovs_strlcpy(name, port.name, name_size);
|
|
|
|
|
dpif_port_destroy(&port);
|
2009-06-15 16:51:46 -07:00
|
|
|
|
} else {
|
|
|
|
|
*name = '\0';
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
/* Initializes 'dump' to begin dumping the ports in a dpif.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
* This function provides no status indication. An error status for the entire
|
|
|
|
|
* dump operation is provided when it is completed by calling
|
|
|
|
|
* dpif_port_dump_done().
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
dpif_port_dump_start(struct dpif_port_dump *dump, const struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
dump->dpif = dpif;
|
|
|
|
|
dump->error = dpif->dpif_class->port_dump_start(dpif, &dump->state);
|
|
|
|
|
log_operation(dpif, "port_dump_start", dump->error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Attempts to retrieve another port from 'dump', which must have been
|
2011-01-23 18:48:02 -08:00
|
|
|
|
* initialized with dpif_port_dump_start(). On success, stores a new dpif_port
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
* into 'port' and returns true. On failure, returns false.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
* Failure might indicate an actual error or merely that the last port has been
|
|
|
|
|
* dumped. An error status for the entire dump operation is provided when it
|
2011-01-23 18:48:02 -08:00
|
|
|
|
* is completed by calling dpif_port_dump_done().
|
|
|
|
|
*
|
|
|
|
|
* The dpif owns the data stored in 'port'. It will remain valid until at
|
|
|
|
|
* least the next time 'dump' is passed to dpif_port_dump_next() or
|
|
|
|
|
* dpif_port_dump_done(). */
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
bool
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_port_dump_next(struct dpif_port_dump *dump, struct dpif_port *port)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
const struct dpif *dpif = dump->dpif;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
if (dump->error) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2009-06-17 14:28:07 -07:00
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
dump->error = dpif->dpif_class->port_dump_next(dpif, dump->state, port);
|
|
|
|
|
if (dump->error == EOF) {
|
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all ports", dpif_name(dpif));
|
|
|
|
|
} else {
|
|
|
|
|
log_operation(dpif, "port_dump_next", dump->error);
|
|
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
if (dump->error) {
|
|
|
|
|
dpif->dpif_class->port_dump_done(dpif, dump->state);
|
|
|
|
|
return false;
|
2009-06-17 14:28:07 -07:00
|
|
|
|
}
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
/* Completes port table dump operation 'dump', which must have been initialized
|
|
|
|
|
* with dpif_port_dump_start(). Returns 0 if the dump operation was
|
|
|
|
|
* error-free, otherwise a positive errno value describing the problem. */
|
|
|
|
|
int
|
|
|
|
|
dpif_port_dump_done(struct dpif_port_dump *dump)
|
|
|
|
|
{
|
|
|
|
|
const struct dpif *dpif = dump->dpif;
|
|
|
|
|
if (!dump->error) {
|
|
|
|
|
dump->error = dpif->dpif_class->port_dump_done(dpif, dump->state);
|
|
|
|
|
log_operation(dpif, "port_dump_done", dump->error);
|
2009-06-17 14:28:07 -07:00
|
|
|
|
}
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
return dump->error == EOF ? 0 : dump->error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-24 10:24:09 -07:00
|
|
|
|
/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
|
|
|
|
|
* 'dpif' has changed, this function does one of the following:
|
|
|
|
|
*
|
|
|
|
|
* - Stores the name of the device that was added to or deleted from 'dpif' in
|
|
|
|
|
* '*devnamep' and returns 0. The caller is responsible for freeing
|
|
|
|
|
* '*devnamep' (with free()) when it no longer needs it.
|
|
|
|
|
*
|
|
|
|
|
* - Returns ENOBUFS and sets '*devnamep' to NULL.
|
|
|
|
|
*
|
|
|
|
|
* This function may also return 'false positives', where it returns 0 and
|
|
|
|
|
* '*devnamep' names a device that was not actually added or deleted or it
|
|
|
|
|
* returns ENOBUFS without any change.
|
|
|
|
|
*
|
|
|
|
|
* Returns EAGAIN if the set of ports in 'dpif' has not changed. May also
|
|
|
|
|
* return other positive errno values to indicate that something has gone
|
|
|
|
|
* wrong. */
|
|
|
|
|
int
|
|
|
|
|
dpif_port_poll(const struct dpif *dpif, char **devnamep)
|
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
int error = dpif->dpif_class->port_poll(dpif, devnamep);
|
2009-06-24 10:24:09 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
*devnamep = NULL;
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Arranges for the poll loop to wake up when port_poll(dpif) will return a
|
|
|
|
|
* value other than EAGAIN. */
|
|
|
|
|
void
|
|
|
|
|
dpif_port_poll_wait(const struct dpif *dpif)
|
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
dpif->dpif_class->port_poll_wait(dpif);
|
2009-06-24 10:24:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-09-29 15:36:14 -07:00
|
|
|
|
/* Extracts the flow stats for a packet. The 'flow' and 'packet'
|
2012-08-17 23:27:39 -07:00
|
|
|
|
* arguments must have been initialized through a call to flow_extract().
|
|
|
|
|
* 'used' is stored into stats->used. */
|
2011-09-29 15:36:14 -07:00
|
|
|
|
void
|
2012-01-31 12:56:49 -08:00
|
|
|
|
dpif_flow_stats_extract(const struct flow *flow, const struct ofpbuf *packet,
|
2012-08-17 23:27:39 -07:00
|
|
|
|
long long int used, struct dpif_flow_stats *stats)
|
2011-09-29 15:36:14 -07:00
|
|
|
|
{
|
2012-01-19 16:55:50 -08:00
|
|
|
|
stats->tcp_flags = packet_get_tcp_flags(packet, flow);
|
2011-09-29 15:36:14 -07:00
|
|
|
|
stats->n_bytes = packet->size;
|
|
|
|
|
stats->n_packets = 1;
|
2012-08-17 23:27:39 -07:00
|
|
|
|
stats->used = used;
|
2011-09-29 15:36:14 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 07:11:50 -08:00
|
|
|
|
/* Appends a human-readable representation of 'stats' to 's'. */
|
|
|
|
|
void
|
|
|
|
|
dpif_flow_stats_format(const struct dpif_flow_stats *stats, struct ds *s)
|
|
|
|
|
{
|
|
|
|
|
ds_put_format(s, "packets:%"PRIu64", bytes:%"PRIu64", used:",
|
|
|
|
|
stats->n_packets, stats->n_bytes);
|
|
|
|
|
if (stats->used) {
|
|
|
|
|
ds_put_format(s, "%.3fs", (time_msec() - stats->used) / 1000.0);
|
|
|
|
|
} else {
|
|
|
|
|
ds_put_format(s, "never");
|
|
|
|
|
}
|
2012-04-05 10:24:56 -07:00
|
|
|
|
if (stats->tcp_flags) {
|
|
|
|
|
ds_put_cstr(s, ", flags:");
|
|
|
|
|
packet_format_tcp_flags(s, stats->tcp_flags);
|
|
|
|
|
}
|
2011-01-26 07:11:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a
|
|
|
|
|
* positive errno value. */
|
|
|
|
|
int
|
|
|
|
|
dpif_flow_flush(struct dpif *dpif)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2009-06-17 14:35:35 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_flow_flush);
|
|
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
|
error = dpif->dpif_class->flow_flush(dpif);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
log_operation(dpif, "flow_flush", error);
|
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
/* Queries 'dpif' for a flow entry. The flow is specified by the Netlink
|
2011-08-18 10:35:40 -07:00
|
|
|
|
* attributes with types OVS_KEY_ATTR_* in the 'key_len' bytes starting at
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* 'key'.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* Returns 0 if successful. If no flow matches, returns ENOENT. On other
|
|
|
|
|
* failure, returns a positive errno value.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* If 'actionsp' is nonnull, then on success '*actionsp' will be set to an
|
|
|
|
|
* ofpbuf owned by the caller that contains the Netlink attributes for the
|
|
|
|
|
* flow's actions. The caller must free the ofpbuf (with ofpbuf_delete()) when
|
|
|
|
|
* it is no longer needed.
|
|
|
|
|
*
|
|
|
|
|
* If 'stats' is nonnull, then on success it will be updated with the flow's
|
|
|
|
|
* statistics. */
|
2009-06-17 14:35:35 -07:00
|
|
|
|
int
|
2011-01-17 14:43:30 -08:00
|
|
|
|
dpif_flow_get(const struct dpif *dpif,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *key, size_t key_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2009-06-17 14:35:35 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_flow_get);
|
|
|
|
|
|
2011-01-17 14:43:30 -08:00
|
|
|
|
error = dpif->dpif_class->flow_get(dpif, key, key_len, actionsp, stats);
|
2010-04-13 16:49:22 -07:00
|
|
|
|
if (error) {
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (actionsp) {
|
|
|
|
|
*actionsp = NULL;
|
|
|
|
|
}
|
|
|
|
|
if (stats) {
|
|
|
|
|
memset(stats, 0, sizeof *stats);
|
|
|
|
|
}
|
2010-04-13 16:49:22 -07:00
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
if (should_log_flow_message(error)) {
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *actions;
|
|
|
|
|
size_t actions_len;
|
|
|
|
|
|
|
|
|
|
if (!error && actionsp) {
|
|
|
|
|
actions = (*actionsp)->data;
|
|
|
|
|
actions_len = (*actionsp)->size;
|
|
|
|
|
} else {
|
|
|
|
|
actions = NULL;
|
|
|
|
|
actions_len = 0;
|
|
|
|
|
}
|
2013-06-20 13:43:56 -07:00
|
|
|
|
log_flow_message(dpif, error, "flow_get", key, key_len,
|
|
|
|
|
NULL, 0, stats, actions, actions_len);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
return error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-12-26 14:39:03 -08:00
|
|
|
|
static int
|
|
|
|
|
dpif_flow_put__(struct dpif *dpif, const struct dpif_flow_put *put)
|
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_flow_put);
|
2012-11-06 13:14:55 -08:00
|
|
|
|
ovs_assert(!(put->flags & ~(DPIF_FP_CREATE | DPIF_FP_MODIFY
|
|
|
|
|
| DPIF_FP_ZERO_STATS)));
|
2011-12-26 14:39:03 -08:00
|
|
|
|
|
|
|
|
|
error = dpif->dpif_class->flow_put(dpif, put);
|
|
|
|
|
if (error && put->stats) {
|
|
|
|
|
memset(put->stats, 0, sizeof *put->stats);
|
|
|
|
|
}
|
|
|
|
|
log_flow_put_message(dpif, put, error);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
/* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink
|
2013-06-19 07:15:10 +00:00
|
|
|
|
* attribute OVS_FLOW_ATTR_KEY with types OVS_KEY_ATTR_* in the 'key_len' bytes
|
2013-11-12 17:10:16 -08:00
|
|
|
|
* starting at 'key', and OVS_FLOW_ATTR_MASK with types of OVS_KEY_ATTR_* in
|
|
|
|
|
* the 'mask_len' bytes starting at 'mask'. The associated actions are
|
|
|
|
|
* specified by the Netlink attributes with types OVS_ACTION_ATTR_* in the
|
|
|
|
|
* 'actions_len' bytes starting at 'actions'.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* - If the flow's key does not exist in 'dpif', then the flow will be added if
|
2011-01-26 07:12:24 -08:00
|
|
|
|
* 'flags' includes DPIF_FP_CREATE. Otherwise the operation will fail with
|
2009-06-17 14:35:35 -07:00
|
|
|
|
* ENOENT.
|
|
|
|
|
*
|
2013-11-12 17:10:16 -08:00
|
|
|
|
* The datapath may reject attempts to insert overlapping flows with EINVAL
|
|
|
|
|
* or EEXIST, but clients should not rely on this: avoiding overlapping flows
|
|
|
|
|
* is primarily the client's responsibility.
|
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* If the operation succeeds, then 'stats', if nonnull, will be zeroed.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* - If the flow's key does exist in 'dpif', then the flow's actions will be
|
2011-01-26 07:12:24 -08:00
|
|
|
|
* updated if 'flags' includes DPIF_FP_MODIFY. Otherwise the operation will
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* fail with EEXIST. If the flow's actions are updated, then its statistics
|
2011-01-26 07:12:24 -08:00
|
|
|
|
* will be zeroed if 'flags' includes DPIF_FP_ZERO_STATS, and left as-is
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* otherwise.
|
|
|
|
|
*
|
|
|
|
|
* If the operation succeeds, then 'stats', if nonnull, will be set to the
|
|
|
|
|
* flow's statistics before the update.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*/
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2011-01-26 07:12:24 -08:00
|
|
|
|
dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *key, size_t key_len,
|
2013-06-19 07:15:10 +00:00
|
|
|
|
const struct nlattr *mask, size_t mask_len,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *actions, size_t actions_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct dpif_flow_stats *stats)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2011-12-26 14:39:03 -08:00
|
|
|
|
struct dpif_flow_put put;
|
|
|
|
|
|
|
|
|
|
put.flags = flags;
|
|
|
|
|
put.key = key;
|
|
|
|
|
put.key_len = key_len;
|
2013-06-19 07:15:10 +00:00
|
|
|
|
put.mask = mask;
|
|
|
|
|
put.mask_len = mask_len;
|
2011-12-26 14:39:03 -08:00
|
|
|
|
put.actions = actions;
|
|
|
|
|
put.actions_len = actions_len;
|
|
|
|
|
put.stats = stats;
|
|
|
|
|
return dpif_flow_put__(dpif, &put);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2012-04-17 21:52:10 -07:00
|
|
|
|
static int
|
|
|
|
|
dpif_flow_del__(struct dpif *dpif, struct dpif_flow_del *del)
|
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_flow_del);
|
|
|
|
|
|
|
|
|
|
error = dpif->dpif_class->flow_del(dpif, del);
|
|
|
|
|
if (error && del->stats) {
|
|
|
|
|
memset(del->stats, 0, sizeof *del->stats);
|
|
|
|
|
}
|
|
|
|
|
log_flow_del_message(dpif, del, error);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
/* Deletes a flow from 'dpif' and returns 0, or returns ENOENT if 'dpif' does
|
|
|
|
|
* not contain such a flow. The flow is specified by the Netlink attributes
|
2011-08-18 10:35:40 -07:00
|
|
|
|
* with types OVS_KEY_ATTR_* in the 'key_len' bytes starting at 'key'.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* If the operation succeeds, then 'stats', if nonnull, will be set to the
|
|
|
|
|
* flow's statistics before its deletion. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2011-01-26 07:03:39 -08:00
|
|
|
|
dpif_flow_del(struct dpif *dpif,
|
|
|
|
|
const struct nlattr *key, size_t key_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct dpif_flow_stats *stats)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2012-04-17 21:52:10 -07:00
|
|
|
|
struct dpif_flow_del del;
|
2009-06-17 12:41:30 -07:00
|
|
|
|
|
2012-04-17 21:52:10 -07:00
|
|
|
|
del.key = key;
|
|
|
|
|
del.key_len = key_len;
|
|
|
|
|
del.stats = stats;
|
|
|
|
|
return dpif_flow_del__(dpif, &del);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
/* Initializes 'dump' to begin dumping the flows in a dpif.
|
|
|
|
|
*
|
|
|
|
|
* This function provides no status indication. An error status for the entire
|
|
|
|
|
* dump operation is provided when it is completed by calling
|
|
|
|
|
* dpif_flow_dump_done().
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
dump->dpif = dpif;
|
|
|
|
|
dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state);
|
|
|
|
|
log_operation(dpif, "flow_dump_start", dump->error);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
/* Attempts to retrieve another flow from 'dump', which must have been
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* initialized with dpif_flow_dump_start(). On success, updates the output
|
|
|
|
|
* parameters as described below and returns true. Otherwise, returns false.
|
|
|
|
|
* Failure might indicate an actual error or merely the end of the flow table.
|
|
|
|
|
* An error status for the entire dump operation is provided when it is
|
|
|
|
|
* completed by calling dpif_flow_dump_done().
|
|
|
|
|
*
|
|
|
|
|
* On success, if 'key' and 'key_len' are nonnull then '*key' and '*key_len'
|
2011-08-18 10:35:40 -07:00
|
|
|
|
* will be set to Netlink attributes with types OVS_KEY_ATTR_* representing the
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* dumped flow's key. If 'actions' and 'actions_len' are nonnull then they are
|
2011-08-18 10:35:40 -07:00
|
|
|
|
* set to Netlink attributes with types OVS_ACTION_ATTR_* representing the
|
2011-01-23 21:56:00 -08:00
|
|
|
|
* dumped flow's actions. If 'stats' is nonnull then it will be set to the
|
|
|
|
|
* dumped flow's statistics.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
|
* All of the returned data is owned by 'dpif', not by the caller, and the
|
|
|
|
|
* caller must not modify or free it. 'dpif' guarantees that it remains
|
|
|
|
|
* accessible and unchanging until at least the next call to 'flow_dump_next'
|
|
|
|
|
* or 'flow_dump_done' for 'dump'. */
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
bool
|
2011-01-26 07:03:39 -08:00
|
|
|
|
dpif_flow_dump_next(struct dpif_flow_dump *dump,
|
|
|
|
|
const struct nlattr **key, size_t *key_len,
|
2013-06-19 07:15:10 +00:00
|
|
|
|
const struct nlattr **mask, size_t *mask_len,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr **actions, size_t *actions_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
const struct dpif_flow_stats **stats)
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
{
|
|
|
|
|
const struct dpif *dpif = dump->dpif;
|
2011-01-26 07:03:39 -08:00
|
|
|
|
int error = dump->error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (!error) {
|
|
|
|
|
error = dpif->dpif_class->flow_dump_next(dpif, dump->state,
|
|
|
|
|
key, key_len,
|
2013-06-19 07:15:10 +00:00
|
|
|
|
mask, mask_len,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
actions, actions_len,
|
|
|
|
|
stats);
|
|
|
|
|
if (error) {
|
|
|
|
|
dpif->dpif_class->flow_dump_done(dpif, dump->state);
|
|
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
if (key) {
|
|
|
|
|
*key = NULL;
|
|
|
|
|
*key_len = 0;
|
2011-01-23 18:44:44 -08:00
|
|
|
|
}
|
2013-06-19 07:15:10 +00:00
|
|
|
|
if (mask) {
|
|
|
|
|
*mask = NULL;
|
|
|
|
|
*mask_len = 0;
|
|
|
|
|
}
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (actions) {
|
|
|
|
|
*actions = NULL;
|
|
|
|
|
*actions_len = 0;
|
|
|
|
|
}
|
|
|
|
|
if (stats) {
|
|
|
|
|
*stats = NULL;
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (!dump->error) {
|
|
|
|
|
if (error == EOF) {
|
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
|
|
|
|
|
} else if (should_log_flow_message(error)) {
|
|
|
|
|
log_flow_message(dpif, error, "flow_dump",
|
|
|
|
|
key ? *key : NULL, key ? *key_len : 0,
|
2013-06-20 13:43:56 -07:00
|
|
|
|
mask ? *mask : NULL, mask ? *mask_len : 0,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
stats ? *stats : NULL, actions ? *actions : NULL,
|
|
|
|
|
actions ? *actions_len : 0);
|
|
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2011-01-26 07:03:39 -08:00
|
|
|
|
dump->error = error;
|
|
|
|
|
return !error;
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Completes flow table dump operation 'dump', which must have been initialized
|
|
|
|
|
* with dpif_flow_dump_start(). Returns 0 if the dump operation was
|
|
|
|
|
* error-free, otherwise a positive errno value describing the problem. */
|
|
|
|
|
int
|
|
|
|
|
dpif_flow_dump_done(struct dpif_flow_dump *dump)
|
|
|
|
|
{
|
|
|
|
|
const struct dpif *dpif = dump->dpif;
|
|
|
|
|
if (!dump->error) {
|
|
|
|
|
dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state);
|
|
|
|
|
log_operation(dpif, "flow_dump_done", dump->error);
|
|
|
|
|
}
|
|
|
|
|
return dump->error == EOF ? 0 : dump->error;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-09 17:28:05 -07:00
|
|
|
|
struct dpif_execute_helper_aux {
|
|
|
|
|
struct dpif *dpif;
|
|
|
|
|
int error;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_execute_helper_execute__(void *aux_, struct ofpbuf *packet,
|
|
|
|
|
const struct flow *flow,
|
|
|
|
|
const struct nlattr *actions, size_t actions_len)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_execute_helper_aux *aux = aux_;
|
|
|
|
|
struct dpif_execute execute;
|
|
|
|
|
struct odputil_keybuf key_stub;
|
|
|
|
|
struct ofpbuf key;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
ofpbuf_use_stub(&key, &key_stub, sizeof key_stub);
|
|
|
|
|
odp_flow_key_from_flow(&key, flow, flow->in_port.odp_port);
|
|
|
|
|
|
|
|
|
|
execute.key = key.data;
|
|
|
|
|
execute.key_len = key.size;
|
|
|
|
|
execute.actions = actions;
|
|
|
|
|
execute.actions_len = actions_len;
|
|
|
|
|
execute.packet = packet;
|
|
|
|
|
execute.needs_help = false;
|
|
|
|
|
|
|
|
|
|
error = aux->dpif->dpif_class->execute(aux->dpif, &execute);
|
|
|
|
|
if (error) {
|
|
|
|
|
aux->error = error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_execute_helper_output_cb(void *aux, struct ofpbuf *packet,
|
|
|
|
|
const struct flow *flow, odp_port_t out_port)
|
|
|
|
|
{
|
|
|
|
|
uint64_t actions_stub[DIV_ROUND_UP(NL_A_U32_SIZE, 8)];
|
|
|
|
|
struct ofpbuf actions;
|
|
|
|
|
|
|
|
|
|
ofpbuf_use_stack(&actions, actions_stub, sizeof actions_stub);
|
|
|
|
|
nl_msg_put_u32(&actions, OVS_ACTION_ATTR_OUTPUT, odp_to_u32(out_port));
|
|
|
|
|
|
|
|
|
|
dpif_execute_helper_execute__(aux, packet, flow,
|
|
|
|
|
actions.data, actions.size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_execute_helper_userspace_cb(void *aux, struct ofpbuf *packet,
|
|
|
|
|
const struct flow *flow,
|
2013-12-16 08:14:52 -08:00
|
|
|
|
const struct nlattr *action,
|
|
|
|
|
bool may_steal OVS_UNUSED)
|
2013-10-09 17:28:05 -07:00
|
|
|
|
{
|
|
|
|
|
dpif_execute_helper_execute__(aux, packet, flow,
|
|
|
|
|
action, NLA_ALIGN(action->nla_len));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Executes 'execute' by performing most of the actions in userspace and
|
|
|
|
|
* passing the fully constructed packets to 'dpif' for output and userspace
|
|
|
|
|
* actions.
|
|
|
|
|
*
|
|
|
|
|
* This helps with actions that a given 'dpif' doesn't implement directly. */
|
|
|
|
|
static int
|
|
|
|
|
dpif_execute_with_help(struct dpif *dpif, const struct dpif_execute *execute)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_execute_helper_aux aux;
|
|
|
|
|
enum odp_key_fitness fit;
|
|
|
|
|
struct flow flow;
|
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_execute_with_help);
|
|
|
|
|
|
|
|
|
|
fit = odp_flow_key_to_flow(execute->key, execute->key_len, &flow);
|
|
|
|
|
if (fit == ODP_FIT_ERROR) {
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aux.dpif = dpif;
|
|
|
|
|
aux.error = 0;
|
|
|
|
|
|
2013-12-16 08:14:52 -08:00
|
|
|
|
odp_execute_actions(&aux, execute->packet, &flow,
|
2013-10-09 17:28:05 -07:00
|
|
|
|
execute->actions, execute->actions_len,
|
|
|
|
|
dpif_execute_helper_output_cb,
|
|
|
|
|
dpif_execute_helper_userspace_cb);
|
|
|
|
|
return aux.error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-26 14:39:03 -08:00
|
|
|
|
static int
|
|
|
|
|
dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute)
|
|
|
|
|
{
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
COVERAGE_INC(dpif_execute);
|
|
|
|
|
if (execute->actions_len > 0) {
|
2013-10-09 17:28:05 -07:00
|
|
|
|
error = (execute->needs_help
|
|
|
|
|
? dpif_execute_with_help(dpif, execute)
|
|
|
|
|
: dpif->dpif_class->execute(dpif, execute));
|
2011-12-26 14:39:03 -08:00
|
|
|
|
} else {
|
|
|
|
|
error = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log_execute_message(dpif, execute, error);
|
|
|
|
|
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2010-12-10 10:40:58 -08:00
|
|
|
|
/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
|
2011-06-01 13:39:51 -07:00
|
|
|
|
* the Ethernet frame specified in 'packet' taken from the flow specified in
|
|
|
|
|
* the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but
|
|
|
|
|
* it contains some metadata that cannot be recovered from 'packet', such as
|
2012-09-13 20:11:08 -07:00
|
|
|
|
* tunnel and in_port.)
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2013-10-09 17:28:05 -07:00
|
|
|
|
* Some dpif providers do not implement every action. The Linux kernel
|
|
|
|
|
* datapath, in particular, does not implement ARP field modification. If
|
|
|
|
|
* 'needs_help' is true, the dpif layer executes in userspace all of the
|
|
|
|
|
* actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
|
|
|
|
|
* OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif
|
|
|
|
|
* implementation.
|
|
|
|
|
*
|
2013-11-02 08:43:14 -07:00
|
|
|
|
* This works even if 'actions_len' is too long for a Netlink attribute.
|
|
|
|
|
*
|
2009-06-17 14:35:35 -07:00
|
|
|
|
* Returns 0 if successful, otherwise a positive errno value. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2010-10-08 16:36:13 -07:00
|
|
|
|
dpif_execute(struct dpif *dpif,
|
2011-06-01 13:39:51 -07:00
|
|
|
|
const struct nlattr *key, size_t key_len,
|
2010-12-10 10:40:58 -08:00
|
|
|
|
const struct nlattr *actions, size_t actions_len,
|
2013-12-16 08:14:52 -08:00
|
|
|
|
struct ofpbuf *buf, bool needs_help)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2011-12-26 14:39:03 -08:00
|
|
|
|
struct dpif_execute execute;
|
|
|
|
|
|
|
|
|
|
execute.key = key;
|
|
|
|
|
execute.key_len = key_len;
|
|
|
|
|
execute.actions = actions;
|
|
|
|
|
execute.actions_len = actions_len;
|
|
|
|
|
execute.packet = buf;
|
2013-11-02 08:43:14 -07:00
|
|
|
|
execute.needs_help = needs_help || nl_attr_oversized(actions_len);
|
2011-12-26 14:39:03 -08:00
|
|
|
|
return dpif_execute__(dpif, &execute);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-09-27 15:08:50 -07:00
|
|
|
|
/* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in
|
|
|
|
|
* which they are specified, placing each operation's results in the "output"
|
|
|
|
|
* members documented in comments.
|
|
|
|
|
*
|
|
|
|
|
* This function exists because some datapaths can perform batched operations
|
|
|
|
|
* faster than individual operations. */
|
|
|
|
|
void
|
2011-12-26 14:17:55 -08:00
|
|
|
|
dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
|
2011-09-27 15:08:50 -07:00
|
|
|
|
{
|
|
|
|
|
if (dpif->dpif_class->operate) {
|
2013-10-09 17:28:05 -07:00
|
|
|
|
while (n_ops > 0) {
|
|
|
|
|
size_t chunk;
|
|
|
|
|
|
|
|
|
|
/* Count 'chunk', the number of ops that can be executed without
|
|
|
|
|
* needing any help. Ops that need help should be rare, so we
|
|
|
|
|
* expect this to ordinarily be 'n_ops', that is, all the ops. */
|
|
|
|
|
for (chunk = 0; chunk < n_ops; chunk++) {
|
|
|
|
|
struct dpif_op *op = ops[chunk];
|
|
|
|
|
|
|
|
|
|
if (op->type == DPIF_OP_EXECUTE && op->u.execute.needs_help) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (chunk) {
|
|
|
|
|
/* Execute a chunk full of ops that the dpif provider can
|
|
|
|
|
* handle itself, without help. */
|
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
dpif->dpif_class->operate(dpif, ops, chunk);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < chunk; i++) {
|
|
|
|
|
struct dpif_op *op = ops[i];
|
|
|
|
|
|
|
|
|
|
switch (op->type) {
|
|
|
|
|
case DPIF_OP_FLOW_PUT:
|
|
|
|
|
log_flow_put_message(dpif, &op->u.flow_put, op->error);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case DPIF_OP_FLOW_DEL:
|
|
|
|
|
log_flow_del_message(dpif, &op->u.flow_del, op->error);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case DPIF_OP_EXECUTE:
|
|
|
|
|
log_execute_message(dpif, &op->u.execute, op->error);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ops += chunk;
|
|
|
|
|
n_ops -= chunk;
|
|
|
|
|
} else {
|
|
|
|
|
/* Help the dpif provider to execute one op. */
|
|
|
|
|
struct dpif_op *op = ops[0];
|
|
|
|
|
|
|
|
|
|
op->error = dpif_execute__(dpif, &op->u.execute);
|
|
|
|
|
ops++;
|
|
|
|
|
n_ops--;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
size_t i;
|
2011-12-26 14:42:48 -08:00
|
|
|
|
|
|
|
|
|
for (i = 0; i < n_ops; i++) {
|
|
|
|
|
struct dpif_op *op = ops[i];
|
|
|
|
|
|
|
|
|
|
switch (op->type) {
|
|
|
|
|
case DPIF_OP_FLOW_PUT:
|
2013-10-09 17:28:05 -07:00
|
|
|
|
op->error = dpif_flow_put__(dpif, &op->u.flow_put);
|
2011-12-26 14:42:48 -08:00
|
|
|
|
break;
|
|
|
|
|
|
2012-04-17 21:52:10 -07:00
|
|
|
|
case DPIF_OP_FLOW_DEL:
|
2013-10-09 17:28:05 -07:00
|
|
|
|
op->error = dpif_flow_del__(dpif, &op->u.flow_del);
|
2012-04-17 21:52:10 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2011-12-26 14:42:48 -08:00
|
|
|
|
case DPIF_OP_EXECUTE:
|
2013-10-09 17:28:05 -07:00
|
|
|
|
op->error = dpif_execute__(dpif, &op->u.execute);
|
2011-12-26 14:42:48 -08:00
|
|
|
|
break;
|
2012-04-17 21:52:10 -07:00
|
|
|
|
|
2013-10-09 17:28:05 -07:00
|
|
|
|
default:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2013-10-09 17:28:05 -07:00
|
|
|
|
}
|
2011-09-27 15:08:50 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-06-08 14:03:47 -07:00
|
|
|
|
/* Returns a string that represents 'type', for use in log messages. */
|
|
|
|
|
const char *
|
|
|
|
|
dpif_upcall_type_to_string(enum dpif_upcall_type type)
|
|
|
|
|
{
|
|
|
|
|
switch (type) {
|
|
|
|
|
case DPIF_UC_MISS: return "miss";
|
|
|
|
|
case DPIF_UC_ACTION: return "action";
|
|
|
|
|
case DPIF_N_UC_TYPES: default: return "<unknown>";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-01-12 17:09:22 -08:00
|
|
|
|
/* Enables or disables receiving packets with dpif_recv() on 'dpif'. Returns 0
|
|
|
|
|
* if successful, otherwise a positive errno value.
|
2011-10-12 16:24:54 -07:00
|
|
|
|
*
|
2012-01-12 17:09:22 -08:00
|
|
|
|
* Turning packet receive off and then back on may change the Netlink PID
|
2011-10-12 16:24:54 -07:00
|
|
|
|
* assignments returned by dpif_port_get_pid(). If the client does this, it
|
|
|
|
|
* must update all of the flows that have OVS_ACTION_ATTR_USERSPACE actions
|
|
|
|
|
* using the new PID assignment. */
|
2009-06-17 14:18:10 -07:00
|
|
|
|
int
|
2012-01-12 17:09:22 -08:00
|
|
|
|
dpif_recv_set(struct dpif *dpif, bool enable)
|
2009-06-17 14:18:10 -07:00
|
|
|
|
{
|
2012-01-12 17:09:22 -08:00
|
|
|
|
int error = dpif->dpif_class->recv_set(dpif, enable);
|
|
|
|
|
log_operation(dpif, "recv_set", error);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
return error;
|
2009-06-17 14:18:10 -07:00
|
|
|
|
}
|
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
/* Polls for an upcall from 'dpif'. If successful, stores the upcall into
|
2012-04-06 16:23:28 -07:00
|
|
|
|
* '*upcall', using 'buf' for storage. Should only be called if
|
|
|
|
|
* dpif_recv_set() has been used to enable receiving packets on 'dpif'.
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*
|
2013-12-16 08:14:52 -08:00
|
|
|
|
* 'upcall->key' and 'upcall->userdata' point into data in the caller-provided
|
|
|
|
|
* 'buf', so their memory cannot be freed separately from 'buf'.
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
*
|
2009-06-17 14:35:35 -07:00
|
|
|
|
* Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
* if no upcall is immediately available. */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
int
|
2012-04-06 16:23:28 -07:00
|
|
|
|
dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
{
|
2012-04-06 16:23:28 -07:00
|
|
|
|
int error = dpif->dpif_class->recv(dpif, upcall, buf);
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) {
|
2011-06-08 14:03:47 -07:00
|
|
|
|
struct ds flow;
|
|
|
|
|
char *packet;
|
|
|
|
|
|
2013-12-16 08:14:52 -08:00
|
|
|
|
packet = ofp_packet_to_string(upcall->packet.data,
|
|
|
|
|
upcall->packet.size);
|
2011-06-08 14:03:47 -07:00
|
|
|
|
|
|
|
|
|
ds_init(&flow);
|
|
|
|
|
odp_flow_key_format(upcall->key, upcall->key_len, &flow);
|
|
|
|
|
|
|
|
|
|
VLOG_DBG("%s: %s upcall:\n%s\n%s",
|
|
|
|
|
dpif_name(dpif), dpif_upcall_type_to_string(upcall->type),
|
|
|
|
|
ds_cstr(&flow), packet);
|
|
|
|
|
|
|
|
|
|
ds_destroy(&flow);
|
|
|
|
|
free(packet);
|
2011-11-28 10:35:15 -08:00
|
|
|
|
} else if (error && error != EAGAIN) {
|
|
|
|
|
log_operation(dpif, "recv", error);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Discards all messages that would otherwise be received by dpif_recv() on
|
2011-01-04 17:00:36 -08:00
|
|
|
|
* 'dpif'. */
|
|
|
|
|
void
|
2009-06-17 14:35:35 -07:00
|
|
|
|
dpif_recv_purge(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
COVERAGE_INC(dpif_purge);
|
2011-01-04 17:00:36 -08:00
|
|
|
|
if (dpif->dpif_class->recv_purge) {
|
|
|
|
|
dpif->dpif_class->recv_purge(dpif);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Arranges for the poll loop to wake up when 'dpif' has a message queued to be
|
|
|
|
|
* received with dpif_recv(). */
|
2009-07-08 13:19:16 -07:00
|
|
|
|
void
|
|
|
|
|
dpif_recv_wait(struct dpif *dpif)
|
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
dpif->dpif_class->recv_wait(dpif);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
}
|
2009-06-16 10:59:43 -07:00
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
|
/* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type'
|
|
|
|
|
* and '*engine_id', respectively. */
|
2009-06-16 10:59:43 -07:00
|
|
|
|
void
|
|
|
|
|
dpif_get_netflow_ids(const struct dpif *dpif,
|
|
|
|
|
uint8_t *engine_type, uint8_t *engine_id)
|
|
|
|
|
{
|
2009-06-17 14:35:35 -07:00
|
|
|
|
*engine_type = dpif->netflow_engine_type;
|
|
|
|
|
*engine_id = dpif->netflow_engine_id;
|
|
|
|
|
}
|
2010-07-20 11:23:21 -07:00
|
|
|
|
|
|
|
|
|
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority
|
2011-11-01 10:13:16 -07:00
|
|
|
|
* value used for setting packet priority.
|
|
|
|
|
* On success, returns 0 and stores the priority into '*priority'.
|
|
|
|
|
* On failure, returns a positive errno value and stores 0 into '*priority'. */
|
2010-07-20 11:23:21 -07:00
|
|
|
|
int
|
|
|
|
|
dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
|
|
|
|
|
uint32_t *priority)
|
|
|
|
|
{
|
|
|
|
|
int error = (dpif->dpif_class->queue_to_priority
|
|
|
|
|
? dpif->dpif_class->queue_to_priority(dpif, queue_id,
|
|
|
|
|
priority)
|
|
|
|
|
: EOPNOTSUPP);
|
|
|
|
|
if (error) {
|
|
|
|
|
*priority = 0;
|
|
|
|
|
}
|
|
|
|
|
log_operation(dpif, "queue_to_priority", error);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
|
|
void
|
2010-01-22 15:14:01 -08:00
|
|
|
|
dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
|
|
|
|
|
const char *name,
|
2009-06-17 14:35:35 -07:00
|
|
|
|
uint8_t netflow_engine_type, uint8_t netflow_engine_id)
|
|
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
|
dpif->dpif_class = dpif_class;
|
2010-01-22 14:37:10 -05:00
|
|
|
|
dpif->base_name = xstrdup(name);
|
2010-02-05 15:58:27 -08:00
|
|
|
|
dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
dpif->netflow_engine_type = netflow_engine_type;
|
|
|
|
|
dpif->netflow_engine_id = netflow_engine_id;
|
|
|
|
|
}
|
2010-02-01 11:36:01 -05:00
|
|
|
|
|
|
|
|
|
/* Undoes the results of initialization.
|
|
|
|
|
*
|
|
|
|
|
* Normally this function only needs to be called from dpif_close().
|
|
|
|
|
* However, it may be called by providers due to an error on opening
|
|
|
|
|
* that occurs after initialization. It this case dpif_close() would
|
|
|
|
|
* never be called. */
|
|
|
|
|
void
|
|
|
|
|
dpif_uninit(struct dpif *dpif, bool close)
|
|
|
|
|
{
|
|
|
|
|
char *base_name = dpif->base_name;
|
|
|
|
|
char *full_name = dpif->full_name;
|
|
|
|
|
|
|
|
|
|
if (close) {
|
2010-02-05 15:58:27 -08:00
|
|
|
|
dpif->dpif_class->close(dpif);
|
2010-02-01 11:36:01 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(base_name);
|
|
|
|
|
free(full_name);
|
|
|
|
|
}
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
log_operation(const struct dpif *dpif, const char *operation, int error)
|
|
|
|
|
{
|
|
|
|
|
if (!error) {
|
|
|
|
|
VLOG_DBG_RL(&dpmsg_rl, "%s: %s success", dpif_name(dpif), operation);
|
2012-01-12 15:48:19 -08:00
|
|
|
|
} else if (ofperr_is_valid(error)) {
|
2009-06-17 14:35:35 -07:00
|
|
|
|
VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
|
2012-01-12 15:48:19 -08:00
|
|
|
|
dpif_name(dpif), operation, ofperr_get_name(error));
|
2010-10-19 09:55:40 -07:00
|
|
|
|
} else {
|
2012-01-12 15:48:19 -08:00
|
|
|
|
VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)",
|
2013-06-24 10:54:49 -07:00
|
|
|
|
dpif_name(dpif), operation, ovs_strerror(error));
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum vlog_level
|
|
|
|
|
flow_message_log_level(int error)
|
|
|
|
|
{
|
2013-06-10 18:09:53 -07:00
|
|
|
|
/* If flows arrive in a batch, userspace may push down multiple
|
|
|
|
|
* unique flow definitions that overlap when wildcards are applied.
|
|
|
|
|
* Kernels that support flow wildcarding will reject these flows as
|
|
|
|
|
* duplicates (EEXIST), so lower the log level to debug for these
|
|
|
|
|
* types of messages. */
|
|
|
|
|
return (error && error != EEXIST) ? VLL_WARN : VLL_DBG;
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
should_log_flow_message(int error)
|
|
|
|
|
{
|
|
|
|
|
return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error),
|
|
|
|
|
error ? &error_rl : &dpmsg_rl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
log_flow_message(const struct dpif *dpif, int error, const char *operation,
|
2011-01-23 18:44:44 -08:00
|
|
|
|
const struct nlattr *key, size_t key_len,
|
2013-06-20 13:43:56 -07:00
|
|
|
|
const struct nlattr *mask, size_t mask_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
const struct dpif_flow_stats *stats,
|
2010-12-11 22:51:31 -08:00
|
|
|
|
const struct nlattr *actions, size_t actions_len)
|
2009-06-17 14:35:35 -07:00
|
|
|
|
{
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
ds_put_format(&ds, "%s: ", dpif_name(dpif));
|
|
|
|
|
if (error) {
|
|
|
|
|
ds_put_cstr(&ds, "failed to ");
|
|
|
|
|
}
|
|
|
|
|
ds_put_format(&ds, "%s ", operation);
|
|
|
|
|
if (error) {
|
2013-06-24 10:54:49 -07:00
|
|
|
|
ds_put_format(&ds, "(%s) ", ovs_strerror(error));
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
2013-09-23 22:58:46 -07:00
|
|
|
|
odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
if (stats) {
|
|
|
|
|
ds_put_cstr(&ds, ", ");
|
2011-01-26 07:11:50 -08:00
|
|
|
|
dpif_flow_stats_format(stats, &ds);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
2010-12-10 10:40:58 -08:00
|
|
|
|
if (actions || actions_len) {
|
2009-06-17 14:35:35 -07:00
|
|
|
|
ds_put_cstr(&ds, ", actions:");
|
2010-12-10 10:40:58 -08:00
|
|
|
|
format_odp_actions(&ds, actions, actions_len);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
}
|
|
|
|
|
vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
}
|
2011-12-26 14:39:03 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
log_flow_put_message(struct dpif *dpif, const struct dpif_flow_put *put,
|
|
|
|
|
int error)
|
|
|
|
|
{
|
|
|
|
|
if (should_log_flow_message(error)) {
|
|
|
|
|
struct ds s;
|
|
|
|
|
|
|
|
|
|
ds_init(&s);
|
|
|
|
|
ds_put_cstr(&s, "put");
|
|
|
|
|
if (put->flags & DPIF_FP_CREATE) {
|
|
|
|
|
ds_put_cstr(&s, "[create]");
|
|
|
|
|
}
|
|
|
|
|
if (put->flags & DPIF_FP_MODIFY) {
|
|
|
|
|
ds_put_cstr(&s, "[modify]");
|
|
|
|
|
}
|
|
|
|
|
if (put->flags & DPIF_FP_ZERO_STATS) {
|
|
|
|
|
ds_put_cstr(&s, "[zero]");
|
|
|
|
|
}
|
|
|
|
|
log_flow_message(dpif, error, ds_cstr(&s),
|
2013-06-20 13:43:56 -07:00
|
|
|
|
put->key, put->key_len, put->mask, put->mask_len,
|
|
|
|
|
put->stats, put->actions, put->actions_len);
|
2011-12-26 14:39:03 -08:00
|
|
|
|
ds_destroy(&s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-17 21:52:10 -07:00
|
|
|
|
static void
|
|
|
|
|
log_flow_del_message(struct dpif *dpif, const struct dpif_flow_del *del,
|
|
|
|
|
int error)
|
|
|
|
|
{
|
|
|
|
|
if (should_log_flow_message(error)) {
|
|
|
|
|
log_flow_message(dpif, error, "flow_del", del->key, del->key_len,
|
2013-06-20 13:43:56 -07:00
|
|
|
|
NULL, 0, !error ? del->stats : NULL, NULL, 0);
|
2012-04-17 21:52:10 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-26 14:39:03 -08:00
|
|
|
|
static void
|
|
|
|
|
log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
|
|
|
|
|
int error)
|
|
|
|
|
{
|
|
|
|
|
if (!(error ? VLOG_DROP_WARN(&error_rl) : VLOG_DROP_DBG(&dpmsg_rl))) {
|
|
|
|
|
struct ds ds = DS_EMPTY_INITIALIZER;
|
|
|
|
|
char *packet;
|
|
|
|
|
|
|
|
|
|
packet = ofp_packet_to_string(execute->packet->data,
|
|
|
|
|
execute->packet->size);
|
|
|
|
|
ds_put_format(&ds, "%s: execute ", dpif_name(dpif));
|
|
|
|
|
format_odp_actions(&ds, execute->actions, execute->actions_len);
|
|
|
|
|
if (error) {
|
2013-06-24 10:54:49 -07:00
|
|
|
|
ds_put_format(&ds, " failed (%s)", ovs_strerror(error));
|
2011-12-26 14:39:03 -08:00
|
|
|
|
}
|
|
|
|
|
ds_put_format(&ds, " on packet %s", packet);
|
|
|
|
|
vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds));
|
|
|
|
|
ds_destroy(&ds);
|
|
|
|
|
free(packet);
|
|
|
|
|
}
|
|
|
|
|
}
|