2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-25 15:07:05 +00:00
Files
openvswitch/lib/dpif.h

272 lines
8.9 KiB
C
Raw Normal View History

/*
* Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DPIF_H
#define DPIF_H 1
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <linux/openvswitch.h>
#include "openflow/openflow.h"
#include "netdev.h"
#include "util.h"
#ifdef __cplusplus
extern "C" {
#endif
struct dpif;
struct ds;
struct flow;
struct nlattr;
struct ofpbuf;
struct sset;
struct dpif_class;
int dp_register_provider(const struct dpif_class *);
int dp_unregister_provider(const char *type);
void dp_blacklist_provider(const char *type);
void dp_enumerate_types(struct sset *types);
const char *dpif_normalize_type(const char *);
int dp_enumerate_names(const char *type, struct sset *names);
void dp_parse_name(const char *datapath_name, char **name, char **type);
int dpif_open(const char *name, const char *type, struct dpif **);
int dpif_create(const char *name, const char *type, struct dpif **);
int dpif_create_and_open(const char *name, const char *type, struct dpif **);
void dpif_close(struct dpif *);
void dpif_run(struct dpif *);
void dpif_wait(struct dpif *);
const char *dpif_name(const struct dpif *);
const char *dpif_base_name(const struct dpif *);
int dpif_delete(struct dpif *);
/* Statistics for a dpif as a whole. */
struct dpif_dp_stats {
uint64_t n_hit; /* Number of flow table matches. */
uint64_t n_missed; /* Number of flow table misses. */
uint64_t n_lost; /* Number of misses not sent to userspace. */
uint64_t n_flows; /* Number of flows present. */
};
int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
/* Port operations. */
int dpif_port_add(struct dpif *, struct netdev *, uint32_t *port_nop);
int dpif_port_del(struct dpif *, uint32_t port_no);
/* A port within a datapath.
*
* 'name' and 'type' are suitable for passing to netdev_open(). */
struct dpif_port {
char *name; /* Network device name, e.g. "eth0". */
char *type; /* Network device type, e.g. "system". */
uint32_t port_no; /* Port number within datapath. */
};
void dpif_port_clone(struct dpif_port *, const struct dpif_port *);
void dpif_port_destroy(struct dpif_port *);
bool dpif_port_exists(const struct dpif *dpif, const char *devname);
int dpif_port_query_by_number(const struct dpif *, uint32_t port_no,
struct dpif_port *);
int dpif_port_query_by_name(const struct dpif *, const char *devname,
struct dpif_port *);
int dpif_port_get_name(struct dpif *, uint32_t port_no,
char *name, size_t name_size);
int dpif_get_max_ports(const struct dpif *);
uint32_t dpif_port_get_pid(const struct dpif *, uint32_t port_no);
2011-01-10 13:12:12 -08:00
struct dpif_port_dump {
const struct dpif *dpif;
int error;
void *state;
};
void dpif_port_dump_start(struct dpif_port_dump *, const struct dpif *);
bool dpif_port_dump_next(struct dpif_port_dump *, struct dpif_port *);
2011-01-10 13:12:12 -08:00
int dpif_port_dump_done(struct dpif_port_dump *);
/* Iterates through each DPIF_PORT in DPIF, using DUMP as state.
2011-01-10 13:12:12 -08:00
*
* Arguments all have pointer type.
*
* If you break out of the loop, then you need to free the dump structure by
* hand using dpif_port_dump_done(). */
#define DPIF_PORT_FOR_EACH(DPIF_PORT, DUMP, DPIF) \
2011-01-10 13:12:12 -08:00
for (dpif_port_dump_start(DUMP, DPIF); \
(dpif_port_dump_next(DUMP, DPIF_PORT) \
2011-01-10 13:12:12 -08:00
? true \
: (dpif_port_dump_done(DUMP), false)); \
)
int dpif_port_poll(const struct dpif *, char **devnamep);
void dpif_port_poll_wait(const struct dpif *);
/* Flow table operations. */
struct dpif_flow_stats {
uint64_t n_packets;
uint64_t n_bytes;
long long int used;
uint8_t tcp_flags;
};
void dpif_flow_stats_extract(const struct flow *, const struct ofpbuf *packet,
long long int used, struct dpif_flow_stats *);
void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *);
enum dpif_flow_put_flags {
DPIF_FP_CREATE = 1 << 0, /* Allow creating a new flow. */
DPIF_FP_MODIFY = 1 << 1, /* Allow modifying an existing flow. */
DPIF_FP_ZERO_STATS = 1 << 2 /* Zero the stats of an existing flow. */
};
int dpif_flow_flush(struct dpif *);
int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
struct dpif_flow_stats *);
int dpif_flow_del(struct dpif *,
const struct nlattr *key, size_t key_len,
struct dpif_flow_stats *);
int dpif_flow_get(const struct dpif *,
const struct nlattr *key, size_t key_len,
struct ofpbuf **actionsp, struct dpif_flow_stats *);
datapath: Change listing flows to use an iterator concept. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. In turn, that means that flow keys must become variable-length. This does not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form, because that would require userspace to know how much space to allocate for each flow's key in advance, or to allocate as much space as could possibly be needed. Neither choice is very attractive. This commit prepares for a different solution, by replacing ODP_FLOW_LIST by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath on each call. It is much cleaner to allocate the maximum amount of space for a single flow key than to do so for possibly a very large number of flow keys. As a side effect, this patch also fixes a race condition that sometimes made "ovs-dpctl dump-flows" print an error: previously, flows were listed and then their actions were retrieved, which left a window in which ovs-vswitchd could delete the flow. Now dumping a flow and its actions is a single step, closing that window. Dumping all of the flows in a datapath is no longer an atomic step, so now it is possible to miss some flows or see a single flow twice during iteration, if the flow table is modified by another process. It doesn't look like this should be a problem for ovs-vswitchd. It would be faster to retrieve a number of flows in batch instead of just one at a time, but that will naturally happen later when the kernel datapath interface is changed to use Netlink, so this patch does not bother with it. Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
struct dpif_flow_dump {
const struct dpif *dpif;
int error;
void *state;
};
void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *);
bool dpif_flow_dump_next(struct dpif_flow_dump *,
const struct nlattr **key, size_t *key_len,
const struct nlattr **actions, size_t *actions_len,
const struct dpif_flow_stats **);
datapath: Change listing flows to use an iterator concept. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. In turn, that means that flow keys must become variable-length. This does not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form, because that would require userspace to know how much space to allocate for each flow's key in advance, or to allocate as much space as could possibly be needed. Neither choice is very attractive. This commit prepares for a different solution, by replacing ODP_FLOW_LIST by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath on each call. It is much cleaner to allocate the maximum amount of space for a single flow key than to do so for possibly a very large number of flow keys. As a side effect, this patch also fixes a race condition that sometimes made "ovs-dpctl dump-flows" print an error: previously, flows were listed and then their actions were retrieved, which left a window in which ovs-vswitchd could delete the flow. Now dumping a flow and its actions is a single step, closing that window. Dumping all of the flows in a datapath is no longer an atomic step, so now it is possible to miss some flows or see a single flow twice during iteration, if the flow table is modified by another process. It doesn't look like this should be a problem for ovs-vswitchd. It would be faster to retrieve a number of flows in batch instead of just one at a time, but that will naturally happen later when the kernel datapath interface is changed to use Netlink, so this patch does not bother with it. Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
int dpif_flow_dump_done(struct dpif_flow_dump *);
/* Packet operations. */
int dpif_execute(struct dpif *,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *);
/* Operation batching interface.
*
* Some datapaths are faster at performing N operations together than the same
* N operations individually, hence an interface for batching.
*/
enum dpif_op_type {
DPIF_OP_FLOW_PUT = 1,
DPIF_OP_FLOW_DEL,
DPIF_OP_EXECUTE,
};
struct dpif_flow_put {
/* Input. */
enum dpif_flow_put_flags flags; /* DPIF_FP_*. */
const struct nlattr *key; /* Flow to put. */
size_t key_len; /* Length of 'key' in bytes. */
const struct nlattr *actions; /* Actions to perform on flow. */
size_t actions_len; /* Length of 'actions' in bytes. */
/* Output. */
struct dpif_flow_stats *stats; /* Optional flow statistics. */
};
struct dpif_flow_del {
/* Input. */
const struct nlattr *key; /* Flow to delete. */
size_t key_len; /* Length of 'key' in bytes. */
/* Output. */
struct dpif_flow_stats *stats; /* Optional flow statistics. */
};
struct dpif_execute {
const struct nlattr *key; /* Partial flow key (only for metadata). */
size_t key_len; /* Length of 'key' in bytes. */
const struct nlattr *actions; /* Actions to execute on packet. */
size_t actions_len; /* Length of 'actions' in bytes. */
const struct ofpbuf *packet; /* Packet to execute. */
};
struct dpif_op {
enum dpif_op_type type;
int error;
union {
struct dpif_flow_put flow_put;
struct dpif_flow_del flow_del;
struct dpif_execute execute;
} u;
};
void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops);
/* Upcalls. */
enum dpif_upcall_type {
DPIF_UC_MISS, /* Miss in flow table. */
DPIF_UC_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
DPIF_N_UC_TYPES
};
const char *dpif_upcall_type_to_string(enum dpif_upcall_type);
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
/* A packet passed up from the datapath to userspace.
*
* If 'key' or 'actions' is nonnull, then it points into data owned by
* 'packet', so their memory cannot be freed separately. (This is hardly a
* great way to do things but it works out OK for the dpif providers and
* clients that exist so far.)
*/
struct dpif_upcall {
/* All types. */
enum dpif_upcall_type type;
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
struct ofpbuf *packet; /* Packet data. */
struct nlattr *key; /* Flow key. */
size_t key_len; /* Length of 'key' in bytes. */
/* DPIF_UC_ACTION only. */
uint64_t userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
};
int dpif_recv_set(struct dpif *, bool enable);
int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *);
void dpif_recv_purge(struct dpif *);
void dpif_recv_wait(struct dpif *);
/* Miscellaneous. */
void dpif_get_netflow_ids(const struct dpif *,
uint8_t *engine_type, uint8_t *engine_id);
int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id,
uint32_t *priority);
#ifdef __cplusplus
}
#endif
#endif /* dpif.h */