2009-06-17 14:35:35 -07:00
|
|
|
/*
|
2012-05-02 15:21:36 -07:00
|
|
|
* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DPIF_PROVIDER_H
|
|
|
|
#define DPIF_PROVIDER_H 1
|
|
|
|
|
|
|
|
/* Provider interface to dpifs, which provide an interface to an Open vSwitch
|
2010-06-07 14:05:56 -07:00
|
|
|
* datapath. A datapath is a collection of physical or virtual ports that are
|
|
|
|
* exposed over OpenFlow as a single switch. Datapaths and the collections of
|
|
|
|
* ports that they contain may be fixed or dynamic. */
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
#include <assert.h>
|
2010-04-27 09:40:46 -07:00
|
|
|
#include "openflow/openflow.h"
|
2009-06-17 14:35:35 -07:00
|
|
|
#include "dpif.h"
|
2010-04-27 09:40:46 -07:00
|
|
|
#include "util.h"
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
/* Open vSwitch datapath interface.
|
|
|
|
*
|
|
|
|
* This structure should be treated as opaque by dpif implementations. */
|
|
|
|
struct dpif {
|
2010-01-22 15:14:01 -08:00
|
|
|
const struct dpif_class *dpif_class;
|
2010-01-22 14:37:10 -05:00
|
|
|
char *base_name;
|
|
|
|
char *full_name;
|
2009-06-17 14:35:35 -07:00
|
|
|
uint8_t netflow_engine_type;
|
|
|
|
uint8_t netflow_engine_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
|
|
|
|
uint8_t netflow_engine_type, uint8_t netflow_engine_id);
|
2010-02-01 11:36:01 -05:00
|
|
|
void dpif_uninit(struct dpif *dpif, bool close);
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
static inline void dpif_assert_class(const struct dpif *dpif,
|
2010-01-22 15:14:01 -08:00
|
|
|
const struct dpif_class *dpif_class)
|
2009-06-17 14:35:35 -07:00
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
assert(dpif->dpif_class == dpif_class);
|
2009-06-17 14:35:35 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Datapath interface class structure, to be defined by each implementation of
|
2009-11-30 23:20:57 -08:00
|
|
|
* a datapath interface.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
|
|
|
* These functions return 0 if successful or a positive errno value on failure,
|
|
|
|
* except where otherwise noted.
|
|
|
|
*
|
|
|
|
* These functions are expected to execute synchronously, that is, to block as
|
|
|
|
* necessary to obtain a result. Thus, they may not return EAGAIN or
|
|
|
|
* EWOULDBLOCK or EINPROGRESS. We may relax this requirement in the future if
|
|
|
|
* and when we encounter performance problems. */
|
|
|
|
struct dpif_class {
|
2010-01-22 14:37:10 -05:00
|
|
|
/* Type of dpif in this class, e.g. "system", "netdev", etc.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2010-01-22 14:37:10 -05:00
|
|
|
* One of the providers should supply a "system" type, since this is
|
|
|
|
* the type assumed if no type is specified when opening a dpif. */
|
|
|
|
const char *type;
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2009-07-06 11:06:36 -07:00
|
|
|
/* Enumerates the names of all known created datapaths, if possible, into
|
|
|
|
* 'all_dps'. The caller has already initialized 'all_dps' and other dpif
|
|
|
|
* classes might already have added names to it.
|
|
|
|
*
|
|
|
|
* This is used by the vswitch at startup, so that it can delete any
|
|
|
|
* datapaths that are not configured.
|
|
|
|
*
|
|
|
|
* Some kinds of datapaths might not be practically enumerable, in which
|
|
|
|
* case this function may be a null pointer. */
|
2011-03-25 13:00:13 -07:00
|
|
|
int (*enumerate)(struct sset *all_dps);
|
2009-07-06 11:06:36 -07:00
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
/* Attempts to open an existing dpif called 'name', if 'create' is false,
|
|
|
|
* or to open an existing dpif or create a new one, if 'create' is true.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2010-11-18 10:06:41 -08:00
|
|
|
* 'dpif_class' is the class of dpif to open.
|
|
|
|
*
|
|
|
|
* If successful, stores a pointer to the new dpif in '*dpifp', which must
|
|
|
|
* have class 'dpif_class'. On failure there are no requirements on what
|
|
|
|
* is stored in '*dpifp'. */
|
|
|
|
int (*open)(const struct dpif_class *dpif_class,
|
|
|
|
const char *name, bool create, struct dpif **dpifp);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
/* Closes 'dpif' and frees associated memory. */
|
|
|
|
void (*close)(struct dpif *dpif);
|
|
|
|
|
|
|
|
/* Attempts to destroy the dpif underlying 'dpif'.
|
|
|
|
*
|
|
|
|
* If successful, 'dpif' will not be used again except as an argument for
|
|
|
|
* the 'close' member function. */
|
2010-01-22 15:14:01 -08:00
|
|
|
int (*destroy)(struct dpif *dpif);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2011-05-06 15:04:29 -07:00
|
|
|
/* Performs periodic work needed by 'dpif', if any is necessary. */
|
|
|
|
void (*run)(struct dpif *dpif);
|
|
|
|
|
|
|
|
/* Arranges for poll_block() to wake up if the "run" member function needs
|
|
|
|
* to be called for 'dpif'. */
|
|
|
|
void (*wait)(struct dpif *dpif);
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
/* Retrieves statistics for 'dpif' into 'stats'. */
|
2011-10-05 11:18:13 -07:00
|
|
|
int (*get_stats)(const struct dpif *dpif, struct dpif_dp_stats *stats);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2012-07-27 23:58:24 -07:00
|
|
|
/* Adds 'netdev' as a new port in 'dpif'. If '*port_no' is not
|
|
|
|
* UINT16_MAX, attempts to use that as the port's port number.
|
|
|
|
*
|
|
|
|
* If port is successfully added, sets '*port_no' to the new port's
|
|
|
|
* port number. Returns EBUSY if caller attempted to choose a port
|
|
|
|
* number, and it was in use. */
|
2010-12-03 14:41:38 -08:00
|
|
|
int (*port_add)(struct dpif *dpif, struct netdev *netdev,
|
2009-06-17 14:35:35 -07:00
|
|
|
uint16_t *port_no);
|
|
|
|
|
|
|
|
/* Removes port numbered 'port_no' from 'dpif'. */
|
|
|
|
int (*port_del)(struct dpif *dpif, uint16_t port_no);
|
|
|
|
|
|
|
|
/* Queries 'dpif' for a port with the given 'port_no' or 'devname'. Stores
|
2011-01-23 18:48:02 -08:00
|
|
|
* information about the port into '*port' if successful.
|
|
|
|
*
|
|
|
|
* The caller takes ownership of data in 'port' and must free it with
|
|
|
|
* dpif_port_destroy() when it is no longer needed. */
|
2009-06-17 14:35:35 -07:00
|
|
|
int (*port_query_by_number)(const struct dpif *dpif, uint16_t port_no,
|
2011-01-23 18:48:02 -08:00
|
|
|
struct dpif_port *port);
|
2009-06-17 14:35:35 -07:00
|
|
|
int (*port_query_by_name)(const struct dpif *dpif, const char *devname,
|
2011-01-23 18:48:02 -08:00
|
|
|
struct dpif_port *port);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2011-01-26 09:24:59 -08:00
|
|
|
/* Returns one greater than the largest port number accepted in flow
|
|
|
|
* actions. */
|
|
|
|
int (*get_max_ports)(const struct dpif *dpif);
|
|
|
|
|
2011-10-12 16:24:54 -07:00
|
|
|
/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE
|
|
|
|
* actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in
|
|
|
|
* flows whose packets arrived on port 'port_no'.
|
|
|
|
*
|
2012-05-05 11:07:42 -07:00
|
|
|
* A 'port_no' of UINT16_MAX should be treated as a special case. The
|
|
|
|
* implementation should return a reserved PID, not allocated to any port,
|
|
|
|
* that the client may use for special purposes.
|
|
|
|
*
|
2011-10-12 16:24:54 -07:00
|
|
|
* The return value only needs to be meaningful when DPIF_UC_ACTION has
|
|
|
|
* been enabled in the 'dpif''s listen mask, and it is allowed to change
|
|
|
|
* when DPIF_UC_ACTION is disabled and then re-enabled.
|
|
|
|
*
|
|
|
|
* A dpif provider that doesn't have meaningful Netlink PIDs can use NULL
|
|
|
|
* for this function. This is equivalent to always returning 0. */
|
|
|
|
uint32_t (*port_get_pid)(const struct dpif *dpif, uint16_t port_no);
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
/* Attempts to begin dumping the ports in a dpif. On success, returns 0
|
|
|
|
* and initializes '*statep' with any data needed for iteration. On
|
|
|
|
* failure, returns a positive errno value. */
|
|
|
|
int (*port_dump_start)(const struct dpif *dpif, void **statep);
|
|
|
|
|
|
|
|
/* Attempts to retrieve another port from 'dpif' for 'state', which was
|
|
|
|
* initialized by a successful call to the 'port_dump_start' function for
|
2011-01-23 18:48:02 -08:00
|
|
|
* 'dpif'. On success, stores a new dpif_port into 'port' and returns 0.
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
* Returns EOF if the end of the port table has been reached, or a positive
|
|
|
|
* errno value on error. This function will not be called again once it
|
|
|
|
* returns nonzero once for a given iteration (but the 'port_dump_done'
|
2011-01-23 18:48:02 -08:00
|
|
|
* function will be called afterward).
|
|
|
|
*
|
|
|
|
* The dpif provider retains ownership of the data stored in 'port'. It
|
|
|
|
* must remain valid until at least the next call to 'port_dump_next' or
|
|
|
|
* 'port_dump_done' for 'state'. */
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
int (*port_dump_next)(const struct dpif *dpif, void *state,
|
2011-01-23 18:48:02 -08:00
|
|
|
struct dpif_port *port);
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
|
|
|
/* Releases resources from 'dpif' for 'state', which was initialized by a
|
|
|
|
* successful call to the 'port_dump_start' function for 'dpif'. */
|
|
|
|
int (*port_dump_done)(const struct dpif *dpif, void *state);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2009-06-24 10:24:09 -07:00
|
|
|
/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
|
|
|
|
* 'dpif' has changed, then this function should do one of the
|
|
|
|
* following:
|
|
|
|
*
|
|
|
|
* - Preferably: store the name of the device that was added to or deleted
|
|
|
|
* from 'dpif' in '*devnamep' and return 0. The caller is responsible
|
|
|
|
* for freeing '*devnamep' (with free()) when it no longer needs it.
|
|
|
|
*
|
|
|
|
* - Alternatively: return ENOBUFS, without indicating the device that was
|
|
|
|
* added or deleted.
|
|
|
|
*
|
|
|
|
* Occasional 'false positives', in which the function returns 0 while
|
|
|
|
* indicating a device that was not actually added or deleted or returns
|
|
|
|
* ENOBUFS without any change, are acceptable.
|
|
|
|
*
|
|
|
|
* If the set of ports in 'dpif' has not changed, returns EAGAIN. May also
|
|
|
|
* return other positive errno values to indicate that something has gone
|
|
|
|
* wrong. */
|
|
|
|
int (*port_poll)(const struct dpif *dpif, char **devnamep);
|
|
|
|
|
|
|
|
/* Arranges for the poll loop to wake up when 'port_poll' will return a
|
|
|
|
* value other than EAGAIN. */
|
|
|
|
void (*port_poll_wait)(const struct dpif *dpif);
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
/* Queries 'dpif' for a flow entry. The flow is specified by the Netlink
|
2011-08-18 10:35:40 -07:00
|
|
|
* attributes with types OVS_KEY_ATTR_* in the 'key_len' bytes starting at
|
2011-01-26 07:03:39 -08:00
|
|
|
* 'key'.
|
2011-01-17 14:40:58 -08:00
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
* Returns 0 if successful. If no flow matches, returns ENOENT. On other
|
|
|
|
* failure, returns a positive errno value.
|
2011-01-17 14:40:58 -08:00
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
* If 'actionsp' is nonnull, then on success '*actionsp' must be set to an
|
|
|
|
* ofpbuf owned by the caller that contains the Netlink attributes for the
|
|
|
|
* flow's actions. The caller must free the ofpbuf (with ofpbuf_delete())
|
|
|
|
* when it is no longer needed.
|
|
|
|
*
|
|
|
|
* If 'stats' is nonnull, then on success it must be updated with the
|
|
|
|
* flow's statistics. */
|
2011-01-17 14:43:30 -08:00
|
|
|
int (*flow_get)(const struct dpif *dpif,
|
2011-01-26 07:03:39 -08:00
|
|
|
const struct nlattr *key, size_t key_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
struct ofpbuf **actionsp, struct dpif_flow_stats *stats);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
|
|
|
/* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink
|
2011-12-26 14:39:03 -08:00
|
|
|
* attributes with types OVS_KEY_ATTR_* in the 'put->key_len' bytes
|
|
|
|
* starting at 'put->key'. The associated actions are specified by the
|
|
|
|
* Netlink attributes with types OVS_ACTION_ATTR_* in the
|
|
|
|
* 'put->actions_len' bytes starting at 'put->actions'.
|
2011-01-26 07:03:39 -08:00
|
|
|
*
|
|
|
|
* - If the flow's key does not exist in 'dpif', then the flow will be
|
2011-12-26 14:39:03 -08:00
|
|
|
* added if 'put->flags' includes DPIF_FP_CREATE. Otherwise the
|
|
|
|
* operation will fail with ENOENT.
|
2011-01-26 07:03:39 -08:00
|
|
|
*
|
2011-12-26 14:39:03 -08:00
|
|
|
* If the operation succeeds, then 'put->stats', if nonnull, must be
|
|
|
|
* zeroed.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
* - If the flow's key does exist in 'dpif', then the flow's actions will
|
2011-12-26 14:39:03 -08:00
|
|
|
* be updated if 'put->flags' includes DPIF_FP_MODIFY. Otherwise the
|
2011-01-26 07:12:24 -08:00
|
|
|
* operation will fail with EEXIST. If the flow's actions are updated,
|
2011-12-26 14:39:03 -08:00
|
|
|
* then its statistics will be zeroed if 'put->flags' includes
|
2011-01-26 07:12:24 -08:00
|
|
|
* DPIF_FP_ZERO_STATS, and left as-is otherwise.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2011-12-26 14:39:03 -08:00
|
|
|
* If the operation succeeds, then 'put->stats', if nonnull, must be set
|
|
|
|
* to the flow's statistics before the update.
|
2009-06-17 14:35:35 -07:00
|
|
|
*/
|
2011-12-26 14:39:03 -08:00
|
|
|
int (*flow_put)(struct dpif *dpif, const struct dpif_flow_put *put);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
|
|
|
/* Deletes a flow from 'dpif' and returns 0, or returns ENOENT if 'dpif'
|
|
|
|
* does not contain such a flow. The flow is specified by the Netlink
|
2012-04-17 21:52:10 -07:00
|
|
|
* attributes with types OVS_KEY_ATTR_* in the 'del->key_len' bytes
|
|
|
|
* starting at 'del->key'.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2012-04-17 21:52:10 -07:00
|
|
|
* If the operation succeeds, then 'del->stats', if nonnull, must be set to
|
|
|
|
* the flow's statistics before its deletion. */
|
|
|
|
int (*flow_del)(struct dpif *dpif, const struct dpif_flow_del *del);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
/* Deletes all flows from 'dpif' and clears all of its queues of received
|
|
|
|
* packets. */
|
|
|
|
int (*flow_flush)(struct dpif *dpif);
|
|
|
|
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
/* Attempts to begin dumping the flows in a dpif. On success, returns 0
|
|
|
|
* and initializes '*statep' with any data needed for iteration. On
|
|
|
|
* failure, returns a positive errno value. */
|
|
|
|
int (*flow_dump_start)(const struct dpif *dpif, void **statep);
|
|
|
|
|
|
|
|
/* Attempts to retrieve another flow from 'dpif' for 'state', which was
|
|
|
|
* initialized by a successful call to the 'flow_dump_start' function for
|
2011-01-26 07:03:39 -08:00
|
|
|
* 'dpif'. On success, updates the output parameters as described below
|
|
|
|
* and returns 0. Returns EOF if the end of the flow table has been
|
|
|
|
* reached, or a positive errno value on error. This function will not be
|
|
|
|
* called again once it returns nonzero within a given iteration (but the
|
|
|
|
* 'flow_dump_done' function will be called afterward).
|
|
|
|
*
|
|
|
|
* On success, if 'key' and 'key_len' are nonnull then '*key' and
|
2011-08-18 10:35:40 -07:00
|
|
|
* '*key_len' must be set to Netlink attributes with types OVS_KEY_ATTR_*
|
2011-01-26 07:03:39 -08:00
|
|
|
* representing the dumped flow's key. If 'actions' and 'actions_len' are
|
2011-01-23 21:56:00 -08:00
|
|
|
* nonnull then they should be set to Netlink attributes with types
|
2011-08-18 10:35:40 -07:00
|
|
|
* OVS_ACTION_ATTR_* representing the dumped flow's actions. If 'stats'
|
2011-01-23 21:56:00 -08:00
|
|
|
* is nonnull then it should be set to the dumped flow's statistics.
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
*
|
2011-01-26 07:03:39 -08:00
|
|
|
* All of the returned data is owned by 'dpif', not by the caller, and the
|
|
|
|
* caller must not modify or free it. 'dpif' must guarantee that it
|
|
|
|
* remains accessible and unchanging until at least the next call to
|
|
|
|
* 'flow_dump_next' or 'flow_dump_done' for 'state'. */
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
int (*flow_dump_next)(const struct dpif *dpif, void *state,
|
2011-01-26 07:03:39 -08:00
|
|
|
const struct nlattr **key, size_t *key_len,
|
|
|
|
const struct nlattr **actions, size_t *actions_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
const struct dpif_flow_stats **stats);
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
|
|
|
/* Releases resources from 'dpif' for 'state', which was initialized by a
|
|
|
|
* successful call to the 'flow_dump_start' function for 'dpif'. */
|
|
|
|
int (*flow_dump_done)(const struct dpif *dpif, void *state);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2011-12-26 14:39:03 -08:00
|
|
|
/* Performs the 'execute->actions_len' bytes of actions in
|
|
|
|
* 'execute->actions' on the Ethernet frame specified in 'execute->packet'
|
|
|
|
* taken from the flow specified in the 'execute->key_len' bytes of
|
|
|
|
* 'execute->key'. ('execute->key' is mostly redundant with
|
|
|
|
* 'execute->packet', but it contains some metadata that cannot be
|
2012-09-13 20:11:08 -07:00
|
|
|
* recovered from 'execute->packet', such as tunnel and in_port.) */
|
2011-12-26 14:39:03 -08:00
|
|
|
int (*execute)(struct dpif *dpif, const struct dpif_execute *execute);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2011-09-27 15:08:50 -07:00
|
|
|
/* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order
|
|
|
|
* in which they are specified, placing each operation's results in the
|
|
|
|
* "output" members documented in comments.
|
|
|
|
*
|
|
|
|
* This function is optional. It is only worthwhile to implement it if
|
|
|
|
* 'dpif' can perform operations in batch faster than individually. */
|
2011-12-26 14:17:55 -08:00
|
|
|
void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops);
|
2011-09-27 15:08:50 -07:00
|
|
|
|
2012-01-12 17:09:22 -08:00
|
|
|
/* Enables or disables receiving packets with dpif_recv() for 'dpif'.
|
|
|
|
* Turning packet receive off and then back on is allowed to change Netlink
|
2011-10-12 16:24:54 -07:00
|
|
|
* PID assignments (see ->port_get_pid()). The client is responsible for
|
|
|
|
* updating flows as necessary if it does this. */
|
2012-01-12 17:09:22 -08:00
|
|
|
int (*recv_set)(struct dpif *dpif, bool enable);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2010-07-20 11:23:21 -07:00
|
|
|
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
|
2011-11-01 10:13:16 -07:00
|
|
|
* priority value used for setting packet priority. */
|
2010-07-20 11:23:21 -07:00
|
|
|
int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id,
|
|
|
|
uint32_t *priority);
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
/* Polls for an upcall from 'dpif'. If successful, stores the upcall into
|
2012-04-06 16:23:28 -07:00
|
|
|
* '*upcall', using 'buf' for storage. Should only be called if 'recv_set'
|
|
|
|
* has been used to enable receiving packets from 'dpif'.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2012-04-06 16:23:28 -07:00
|
|
|
* The implementation should point 'upcall->packet' and 'upcall->key' into
|
|
|
|
* data in the caller-provided 'buf'. If necessary to make room, the
|
|
|
|
* implementation may expand the data in 'buf'. (This is hardly a great
|
|
|
|
* way to do things but it works out OK for the dpif providers that exist
|
|
|
|
* so far.)
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
*
|
|
|
|
* This function must not block. If no upcall is pending when it is
|
|
|
|
* called, it should return EAGAIN without blocking. */
|
2012-04-06 16:23:28 -07:00
|
|
|
int (*recv)(struct dpif *dpif, struct dpif_upcall *upcall,
|
|
|
|
struct ofpbuf *buf);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
/* Arranges for the poll loop to wake up when 'dpif' has a message queued
|
|
|
|
* to be received with the recv member function. */
|
|
|
|
void (*recv_wait)(struct dpif *dpif);
|
2011-01-04 17:00:36 -08:00
|
|
|
|
|
|
|
/* Throws away any queued upcalls that 'dpif' currently has ready to
|
|
|
|
* return. */
|
|
|
|
void (*recv_purge)(struct dpif *dpif);
|
2009-06-17 14:35:35 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
extern const struct dpif_class dpif_linux_class;
|
2009-06-19 14:09:39 -07:00
|
|
|
extern const struct dpif_class dpif_netdev_class;
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
#endif /* dpif-provider.h */
|