2009-06-17 14:35:35 -07:00
|
|
|
/*
|
2010-01-22 14:37:10 -05:00
|
|
|
* Copyright (c) 2009, 2010 Nicira Networks.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DPIF_PROVIDER_H
|
|
|
|
#define DPIF_PROVIDER_H 1
|
|
|
|
|
|
|
|
/* Provider interface to dpifs, which provide an interface to an Open vSwitch
|
2010-06-07 14:05:56 -07:00
|
|
|
* datapath. A datapath is a collection of physical or virtual ports that are
|
|
|
|
* exposed over OpenFlow as a single switch. Datapaths and the collections of
|
|
|
|
* ports that they contain may be fixed or dynamic. */
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
#include <assert.h>
|
2010-04-27 09:40:46 -07:00
|
|
|
#include "openflow/openflow.h"
|
2009-06-17 14:35:35 -07:00
|
|
|
#include "dpif.h"
|
2010-04-27 09:40:46 -07:00
|
|
|
#include "util.h"
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
/* Open vSwitch datapath interface.
|
|
|
|
*
|
|
|
|
* This structure should be treated as opaque by dpif implementations. */
|
|
|
|
struct dpif {
|
2010-01-22 15:14:01 -08:00
|
|
|
const struct dpif_class *dpif_class;
|
2010-01-22 14:37:10 -05:00
|
|
|
char *base_name;
|
|
|
|
char *full_name;
|
2009-06-17 14:35:35 -07:00
|
|
|
uint8_t netflow_engine_type;
|
|
|
|
uint8_t netflow_engine_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
|
|
|
|
uint8_t netflow_engine_type, uint8_t netflow_engine_id);
|
2010-02-01 11:36:01 -05:00
|
|
|
void dpif_uninit(struct dpif *dpif, bool close);
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
static inline void dpif_assert_class(const struct dpif *dpif,
|
2010-01-22 15:14:01 -08:00
|
|
|
const struct dpif_class *dpif_class)
|
2009-06-17 14:35:35 -07:00
|
|
|
{
|
2010-01-22 15:14:01 -08:00
|
|
|
assert(dpif->dpif_class == dpif_class);
|
2009-06-17 14:35:35 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Datapath interface class structure, to be defined by each implementation of
|
2009-11-30 23:20:57 -08:00
|
|
|
* a datapath interface.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
|
|
|
* These functions return 0 if successful or a positive errno value on failure,
|
|
|
|
* except where otherwise noted.
|
|
|
|
*
|
|
|
|
* These functions are expected to execute synchronously, that is, to block as
|
|
|
|
* necessary to obtain a result. Thus, they may not return EAGAIN or
|
|
|
|
* EWOULDBLOCK or EINPROGRESS. We may relax this requirement in the future if
|
|
|
|
* and when we encounter performance problems. */
|
|
|
|
struct dpif_class {
|
2010-01-22 14:37:10 -05:00
|
|
|
/* Type of dpif in this class, e.g. "system", "netdev", etc.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2010-01-22 14:37:10 -05:00
|
|
|
* One of the providers should supply a "system" type, since this is
|
|
|
|
* the type assumed if no type is specified when opening a dpif. */
|
|
|
|
const char *type;
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2009-06-19 14:09:09 -07:00
|
|
|
/* Performs periodic work needed by dpifs of this class, if any is
|
|
|
|
* necessary. */
|
|
|
|
void (*run)(void);
|
|
|
|
|
|
|
|
/* Arranges for poll_block() to wake up if the "run" member function needs
|
|
|
|
* to be called. */
|
|
|
|
void (*wait)(void);
|
|
|
|
|
2009-07-06 11:06:36 -07:00
|
|
|
/* Enumerates the names of all known created datapaths, if possible, into
|
|
|
|
* 'all_dps'. The caller has already initialized 'all_dps' and other dpif
|
|
|
|
* classes might already have added names to it.
|
|
|
|
*
|
|
|
|
* This is used by the vswitch at startup, so that it can delete any
|
|
|
|
* datapaths that are not configured.
|
|
|
|
*
|
|
|
|
* Some kinds of datapaths might not be practically enumerable, in which
|
|
|
|
* case this function may be a null pointer. */
|
|
|
|
int (*enumerate)(struct svec *all_dps);
|
|
|
|
|
2010-01-22 14:37:10 -05:00
|
|
|
/* Attempts to open an existing dpif called 'name', if 'create' is false,
|
|
|
|
* or to open an existing dpif or create a new one, if 'create' is true.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
2010-11-18 10:06:41 -08:00
|
|
|
* 'dpif_class' is the class of dpif to open.
|
|
|
|
*
|
|
|
|
* If successful, stores a pointer to the new dpif in '*dpifp', which must
|
|
|
|
* have class 'dpif_class'. On failure there are no requirements on what
|
|
|
|
* is stored in '*dpifp'. */
|
|
|
|
int (*open)(const struct dpif_class *dpif_class,
|
|
|
|
const char *name, bool create, struct dpif **dpifp);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
/* Closes 'dpif' and frees associated memory. */
|
|
|
|
void (*close)(struct dpif *dpif);
|
|
|
|
|
2009-07-06 11:06:36 -07:00
|
|
|
/* Enumerates all names that may be used to open 'dpif' into 'all_names'.
|
|
|
|
* The Linux datapath, for example, supports opening a datapath both by
|
|
|
|
* number, e.g. "dp0", and by the name of the datapath's local port. For
|
|
|
|
* some datapaths, this might be an infinite set (e.g. in a file name,
|
|
|
|
* slashes may be duplicated any number of times), in which case only the
|
|
|
|
* names most likely to be used should be enumerated.
|
|
|
|
*
|
|
|
|
* The caller has already initialized 'all_names' and might already have
|
|
|
|
* added some names to it. This function should not disturb any existing
|
|
|
|
* names in 'all_names'.
|
|
|
|
*
|
|
|
|
* If a datapath class does not support multiple names for a datapath, this
|
|
|
|
* function may be a null pointer.
|
|
|
|
*
|
|
|
|
* This is used by the vswitch at startup, */
|
|
|
|
int (*get_all_names)(const struct dpif *dpif, struct svec *all_names);
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
/* Attempts to destroy the dpif underlying 'dpif'.
|
|
|
|
*
|
|
|
|
* If successful, 'dpif' will not be used again except as an argument for
|
|
|
|
* the 'close' member function. */
|
2010-01-22 15:14:01 -08:00
|
|
|
int (*destroy)(struct dpif *dpif);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
/* Retrieves statistics for 'dpif' into 'stats'. */
|
|
|
|
int (*get_stats)(const struct dpif *dpif, struct odp_stats *stats);
|
|
|
|
|
|
|
|
/* Retrieves 'dpif''s current treatment of IP fragments into '*drop_frags':
|
|
|
|
* true indicates that fragments are dropped, false indicates that
|
|
|
|
* fragments are treated in the same way as other IP packets (except that
|
|
|
|
* the L4 header cannot be read). */
|
|
|
|
int (*get_drop_frags)(const struct dpif *dpif, bool *drop_frags);
|
|
|
|
|
|
|
|
/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose
|
|
|
|
* meaning is the same as for the get_drop_frags member function. */
|
|
|
|
int (*set_drop_frags)(struct dpif *dpif, bool drop_frags);
|
|
|
|
|
2010-12-03 14:41:38 -08:00
|
|
|
/* Adds 'netdev' as a new port in 'dpif'. If successful, sets '*port_no'
|
2009-06-17 14:35:35 -07:00
|
|
|
* to the new port's port number. */
|
2010-12-03 14:41:38 -08:00
|
|
|
int (*port_add)(struct dpif *dpif, struct netdev *netdev,
|
2009-06-17 14:35:35 -07:00
|
|
|
uint16_t *port_no);
|
|
|
|
|
|
|
|
/* Removes port numbered 'port_no' from 'dpif'. */
|
|
|
|
int (*port_del)(struct dpif *dpif, uint16_t port_no);
|
|
|
|
|
|
|
|
/* Queries 'dpif' for a port with the given 'port_no' or 'devname'. Stores
|
|
|
|
* information about the port into '*port' if successful. */
|
|
|
|
int (*port_query_by_number)(const struct dpif *dpif, uint16_t port_no,
|
|
|
|
struct odp_port *port);
|
|
|
|
int (*port_query_by_name)(const struct dpif *dpif, const char *devname,
|
|
|
|
struct odp_port *port);
|
|
|
|
|
|
|
|
/* Stores in 'ports' information about up to 'n' ports attached to 'dpif',
|
|
|
|
* in no particular order. Returns the number of ports attached to 'dpif'
|
|
|
|
* (not the number stored), if successful, otherwise a negative errno
|
|
|
|
* value. */
|
|
|
|
int (*port_list)(const struct dpif *dpif, struct odp_port *ports, int n);
|
|
|
|
|
2009-06-24 10:24:09 -07:00
|
|
|
/* Polls for changes in the set of ports in 'dpif'. If the set of ports in
|
|
|
|
* 'dpif' has changed, then this function should do one of the
|
|
|
|
* following:
|
|
|
|
*
|
|
|
|
* - Preferably: store the name of the device that was added to or deleted
|
|
|
|
* from 'dpif' in '*devnamep' and return 0. The caller is responsible
|
|
|
|
* for freeing '*devnamep' (with free()) when it no longer needs it.
|
|
|
|
*
|
|
|
|
* - Alternatively: return ENOBUFS, without indicating the device that was
|
|
|
|
* added or deleted.
|
|
|
|
*
|
|
|
|
* Occasional 'false positives', in which the function returns 0 while
|
|
|
|
* indicating a device that was not actually added or deleted or returns
|
|
|
|
* ENOBUFS without any change, are acceptable.
|
|
|
|
*
|
|
|
|
* If the set of ports in 'dpif' has not changed, returns EAGAIN. May also
|
|
|
|
* return other positive errno values to indicate that something has gone
|
|
|
|
* wrong. */
|
|
|
|
int (*port_poll)(const struct dpif *dpif, char **devnamep);
|
|
|
|
|
|
|
|
/* Arranges for the poll loop to wake up when 'port_poll' will return a
|
|
|
|
* value other than EAGAIN. */
|
|
|
|
void (*port_poll_wait)(const struct dpif *dpif);
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
/* For each flow 'flow' in the 'n' flows in 'flows':
|
|
|
|
*
|
|
|
|
* - If a flow matching 'flow->key' exists in 'dpif':
|
|
|
|
*
|
|
|
|
* Stores 0 into 'flow->stats.error' and stores statistics for the flow
|
|
|
|
* into 'flow->stats'.
|
|
|
|
*
|
|
|
|
* If 'flow->n_actions' is zero, then 'flow->actions' is ignored. If
|
|
|
|
* 'flow->n_actions' is nonzero, then 'flow->actions' should point to
|
|
|
|
* an array of the specified number of actions. At most that many of
|
|
|
|
* the flow's actions will be copied into that array.
|
|
|
|
* 'flow->n_actions' will be updated to the number of actions actually
|
|
|
|
* present in the flow, which may be greater than the number stored if
|
|
|
|
* the flow has more actions than space available in the array.
|
|
|
|
*
|
|
|
|
* - Flow-specific errors are indicated by a positive errno value in
|
|
|
|
* 'flow->stats.error'. In particular, ENOENT indicates that no flow
|
|
|
|
* matching 'flow->key' exists in 'dpif'. When an error value is stored,
|
|
|
|
* the contents of 'flow->key' are preserved but other members of 'flow'
|
|
|
|
* should be treated as indeterminate.
|
|
|
|
*
|
|
|
|
* Returns 0 if all 'n' flows in 'flows' were updated (whether they were
|
|
|
|
* individually successful or not is indicated by 'flow->stats.error',
|
|
|
|
* however). Returns a positive errno value if an error that prevented
|
|
|
|
* this update occurred, in which the caller must not depend on any
|
|
|
|
* elements in 'flows' being updated or not updated.
|
|
|
|
*/
|
|
|
|
int (*flow_get)(const struct dpif *dpif, struct odp_flow flows[], int n);
|
|
|
|
|
|
|
|
/* Adds or modifies a flow in 'dpif' as specified in 'put':
|
|
|
|
*
|
|
|
|
* - If the flow specified in 'put->flow' does not exist in 'dpif', then
|
|
|
|
* behavior depends on whether ODPPF_CREATE is specified in 'put->flags':
|
|
|
|
* if it is, the flow will be added, otherwise the operation will fail
|
|
|
|
* with ENOENT.
|
|
|
|
*
|
|
|
|
* - Otherwise, the flow specified in 'put->flow' does exist in 'dpif'.
|
|
|
|
* Behavior in this case depends on whether ODPPF_MODIFY is specified in
|
|
|
|
* 'put->flags': if it is, the flow's actions will be updated, otherwise
|
|
|
|
* the operation will fail with EEXIST. If the flow's actions are
|
|
|
|
* updated, then its statistics will be zeroed if ODPPF_ZERO_STATS is set
|
|
|
|
* in 'put->flags', left as-is otherwise.
|
|
|
|
*/
|
|
|
|
int (*flow_put)(struct dpif *dpif, struct odp_flow_put *put);
|
|
|
|
|
|
|
|
/* Deletes a flow matching 'flow->key' from 'dpif' or returns ENOENT if
|
|
|
|
* 'dpif' does not contain such a flow.
|
|
|
|
*
|
|
|
|
* If successful, updates 'flow->stats', 'flow->n_actions', and
|
|
|
|
* 'flow->actions' as described in more detail under the flow_get member
|
|
|
|
* function below. */
|
|
|
|
int (*flow_del)(struct dpif *dpif, struct odp_flow *flow);
|
|
|
|
|
|
|
|
/* Deletes all flows from 'dpif' and clears all of its queues of received
|
|
|
|
* packets. */
|
|
|
|
int (*flow_flush)(struct dpif *dpif);
|
|
|
|
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
/* Attempts to begin dumping the flows in a dpif. On success, returns 0
|
|
|
|
* and initializes '*statep' with any data needed for iteration. On
|
|
|
|
* failure, returns a positive errno value. */
|
|
|
|
int (*flow_dump_start)(const struct dpif *dpif, void **statep);
|
|
|
|
|
|
|
|
/* Attempts to retrieve another flow from 'dpif' for 'state', which was
|
|
|
|
* initialized by a successful call to the 'flow_dump_start' function for
|
|
|
|
* 'dpif'. On success, stores a new odp_flow into 'flow' and returns 0.
|
|
|
|
* Returns EOF if the end of the flow table has been reached, or a positive
|
|
|
|
* errno value on error. This function will not be called again once it
|
|
|
|
* returns nonzero once for a given iteration (but the 'flow_dump_done'
|
|
|
|
* function will be called afterward).
|
|
|
|
*
|
|
|
|
* Dumping flow actions is optional. If the caller does not want to dump
|
|
|
|
* actions it will initialize 'flow->actions' to NULL and
|
|
|
|
* 'flow->actions_len' to 0. Otherwise, 'flow->actions' points to an array
|
|
|
|
* of struct nlattr and 'flow->actions_len' contains the number of bytes of
|
|
|
|
* Netlink attributes. The implemention should fill in as many actions as
|
|
|
|
* will fit into the provided array and update 'flow->actions_len' with the
|
|
|
|
* number of bytes required (regardless of whether they fit in the provided
|
|
|
|
* space). */
|
|
|
|
int (*flow_dump_next)(const struct dpif *dpif, void *state,
|
|
|
|
struct odp_flow *flow);
|
|
|
|
|
|
|
|
/* Releases resources from 'dpif' for 'state', which was initialized by a
|
|
|
|
* successful call to the 'flow_dump_start' function for 'dpif'. */
|
|
|
|
int (*flow_dump_done)(const struct dpif *dpif, void *state);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2010-12-10 10:40:58 -08:00
|
|
|
/* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
|
|
|
|
* frame specified in 'packet'. */
|
|
|
|
int (*execute)(struct dpif *dpif, const struct nlattr *actions,
|
|
|
|
size_t actions_len, const struct ofpbuf *packet);
|
2009-06-17 14:35:35 -07:00
|
|
|
|
|
|
|
/* Retrieves 'dpif''s "listen mask" into '*listen_mask'. Each ODPL_* bit
|
|
|
|
* set in '*listen_mask' indicates the 'dpif' will receive messages of the
|
|
|
|
* corresponding type when it calls the recv member function. */
|
|
|
|
int (*recv_get_mask)(const struct dpif *dpif, int *listen_mask);
|
|
|
|
|
|
|
|
/* Sets 'dpif''s "listen mask" to 'listen_mask'. Each ODPL_* bit set in
|
|
|
|
* 'listen_mask' indicates the 'dpif' will receive messages of the
|
|
|
|
* corresponding type when it calls the recv member function. */
|
|
|
|
int (*recv_set_mask)(struct dpif *dpif, int listen_mask);
|
|
|
|
|
2010-01-04 13:08:37 -08:00
|
|
|
/* Retrieves 'dpif''s sFlow sampling probability into '*probability'.
|
|
|
|
* Return value is 0 or a positive errno value. EOPNOTSUPP indicates that
|
|
|
|
* the datapath does not support sFlow, as does a null pointer.
|
|
|
|
*
|
2010-01-08 16:44:43 -08:00
|
|
|
* '*probability' is expressed as the number of packets out of UINT_MAX to
|
|
|
|
* sample, e.g. probability/UINT_MAX is the probability of sampling a given
|
|
|
|
* packet. */
|
2010-01-04 13:08:37 -08:00
|
|
|
int (*get_sflow_probability)(const struct dpif *dpif,
|
|
|
|
uint32_t *probability);
|
|
|
|
|
|
|
|
/* Sets 'dpif''s sFlow sampling probability to 'probability'. Return value
|
|
|
|
* is 0 or a positive errno value. EOPNOTSUPP indicates that the datapath
|
|
|
|
* does not support sFlow, as does a null pointer.
|
|
|
|
*
|
2010-01-08 16:44:43 -08:00
|
|
|
* 'probability' is expressed as the number of packets out of UINT_MAX to
|
|
|
|
* sample, e.g. probability/UINT_MAX is the probability of sampling a given
|
|
|
|
* packet. */
|
2010-01-04 13:08:37 -08:00
|
|
|
int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability);
|
|
|
|
|
2010-07-20 11:23:21 -07:00
|
|
|
/* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a
|
|
|
|
* priority value for use in the ODPAT_SET_PRIORITY action in
|
|
|
|
* '*priority'. */
|
|
|
|
int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id,
|
|
|
|
uint32_t *priority);
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
/* Attempts to receive a message from 'dpif'. If successful, stores the
|
|
|
|
* message into '*packetp'. The message, if one is received, must begin
|
2010-04-27 09:40:46 -07:00
|
|
|
* with 'struct odp_msg' as a header, and must have at least
|
|
|
|
* DPIF_RECV_MSG_PADDING bytes of headroom (allocated using
|
|
|
|
* e.g. ofpbuf_reserve()). Only messages of the types selected with the
|
|
|
|
* set_listen_mask member function should be received.
|
2009-06-17 14:35:35 -07:00
|
|
|
*
|
|
|
|
* This function must not block. If no message is ready to be received
|
|
|
|
* when it is called, it should return EAGAIN without blocking. */
|
|
|
|
int (*recv)(struct dpif *dpif, struct ofpbuf **packetp);
|
|
|
|
|
|
|
|
/* Arranges for the poll loop to wake up when 'dpif' has a message queued
|
|
|
|
* to be received with the recv member function. */
|
|
|
|
void (*recv_wait)(struct dpif *dpif);
|
|
|
|
};
|
|
|
|
|
|
|
|
extern const struct dpif_class dpif_linux_class;
|
2009-06-19 14:09:39 -07:00
|
|
|
extern const struct dpif_class dpif_netdev_class;
|
2009-06-17 14:35:35 -07:00
|
|
|
|
2010-01-22 15:14:01 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-06-17 14:35:35 -07:00
|
|
|
#endif /* dpif-provider.h */
|