2009-06-19 14:09:39 -07:00
|
|
|
|
/*
|
2011-01-12 09:22:12 -08:00
|
|
|
|
* Copyright (c) 2009, 2010, 2011 Nicira Networks.
|
2009-06-19 14:09:39 -07:00
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
#include "dpif.h"
|
|
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <inttypes.h>
|
|
|
|
|
#include <netinet/in.h>
|
2010-05-26 10:05:19 -07:00
|
|
|
|
#include <sys/socket.h>
|
2010-02-12 12:51:36 -08:00
|
|
|
|
#include <net/if.h>
|
2010-12-10 10:40:58 -08:00
|
|
|
|
#include <stdint.h>
|
2009-06-19 14:09:39 -07:00
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
|
#include <sys/stat.h>
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
|
|
#include "csum.h"
|
2010-11-29 12:21:08 -08:00
|
|
|
|
#include "dpif.h"
|
2009-06-19 14:09:39 -07:00
|
|
|
|
#include "dpif-provider.h"
|
2010-11-29 12:21:08 -08:00
|
|
|
|
#include "dummy.h"
|
2011-01-23 18:44:44 -08:00
|
|
|
|
#include "dynamic-string.h"
|
2009-06-19 14:09:39 -07:00
|
|
|
|
#include "flow.h"
|
|
|
|
|
#include "hmap.h"
|
|
|
|
|
#include "list.h"
|
|
|
|
|
#include "netdev.h"
|
2010-12-10 10:40:58 -08:00
|
|
|
|
#include "netlink.h"
|
2009-06-19 14:09:39 -07:00
|
|
|
|
#include "odp-util.h"
|
|
|
|
|
#include "ofp-print.h"
|
|
|
|
|
#include "ofpbuf.h"
|
|
|
|
|
#include "packets.h"
|
|
|
|
|
#include "poll-loop.h"
|
2011-10-11 11:07:14 -07:00
|
|
|
|
#include "random.h"
|
2010-11-24 12:35:22 -08:00
|
|
|
|
#include "shash.h"
|
2009-06-19 14:09:39 -07:00
|
|
|
|
#include "timeval.h"
|
|
|
|
|
#include "util.h"
|
|
|
|
|
#include "vlog.h"
|
2010-07-16 11:02:49 -07:00
|
|
|
|
|
2010-10-19 14:47:01 -07:00
|
|
|
|
VLOG_DEFINE_THIS_MODULE(dpif_netdev);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
/* Configuration parameters. */
|
|
|
|
|
enum { MAX_PORTS = 256 }; /* Maximum number of ports. */
|
|
|
|
|
enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
|
|
|
|
|
|
|
|
|
|
/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
|
|
|
|
|
* headers to be aligned on a 4-byte boundary. */
|
|
|
|
|
enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
|
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
/* Queues. */
|
|
|
|
|
enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
|
|
|
|
|
enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
|
|
|
|
|
enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
|
|
|
|
|
BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
|
|
|
|
|
|
|
|
|
|
struct dp_netdev_queue {
|
|
|
|
|
struct dpif_upcall *upcalls[MAX_QUEUE_LEN];
|
|
|
|
|
unsigned int head, tail;
|
|
|
|
|
};
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
/* Datapath based on the network device interface from netdev.h. */
|
|
|
|
|
struct dp_netdev {
|
2010-11-29 12:21:08 -08:00
|
|
|
|
const struct dpif_class *class;
|
2010-11-24 12:35:22 -08:00
|
|
|
|
char *name;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
int open_cnt;
|
2010-02-08 13:22:41 -05:00
|
|
|
|
bool destroyed;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
struct dp_netdev_queue queues[N_QUEUES];
|
2009-06-19 14:09:39 -07:00
|
|
|
|
struct hmap flow_table; /* Flow table. */
|
|
|
|
|
|
|
|
|
|
/* Statistics. */
|
|
|
|
|
long long int n_hit; /* Number of flow table matches. */
|
|
|
|
|
long long int n_missed; /* Number of flow table misses. */
|
|
|
|
|
long long int n_lost; /* Number of misses not passed to client. */
|
|
|
|
|
|
|
|
|
|
/* Ports. */
|
|
|
|
|
struct dp_netdev_port *ports[MAX_PORTS];
|
|
|
|
|
struct list port_list;
|
|
|
|
|
unsigned int serial;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* A port in a netdev-based datapath. */
|
|
|
|
|
struct dp_netdev_port {
|
|
|
|
|
int port_no; /* Index into dp_netdev's 'ports'. */
|
|
|
|
|
struct list node; /* Element in dp_netdev's 'port_list'. */
|
|
|
|
|
struct netdev *netdev;
|
2010-12-03 14:41:38 -08:00
|
|
|
|
bool internal; /* Internal port? */
|
2009-06-19 14:09:39 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* A flow in dp_netdev's 'flow_table'. */
|
|
|
|
|
struct dp_netdev_flow {
|
|
|
|
|
struct hmap_node node; /* Element in dp_netdev's 'flow_table'. */
|
2010-10-11 13:31:35 -07:00
|
|
|
|
struct flow key;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
/* Statistics. */
|
2011-01-26 07:11:50 -08:00
|
|
|
|
long long int used; /* Last used time, in monotonic msecs. */
|
2010-08-24 16:00:46 -07:00
|
|
|
|
long long int packet_count; /* Number of packets matched. */
|
|
|
|
|
long long int byte_count; /* Number of bytes matched. */
|
2011-05-06 11:27:05 -07:00
|
|
|
|
ovs_be16 tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
/* Actions. */
|
2010-12-10 10:40:58 -08:00
|
|
|
|
struct nlattr *actions;
|
2010-12-11 22:51:31 -08:00
|
|
|
|
size_t actions_len;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Interface to netdev-based datapath. */
|
|
|
|
|
struct dpif_netdev {
|
|
|
|
|
struct dpif dpif;
|
|
|
|
|
struct dp_netdev *dp;
|
|
|
|
|
int listen_mask;
|
|
|
|
|
unsigned int dp_serial;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* All netdev-based datapaths. */
|
2010-11-24 12:35:22 -08:00
|
|
|
|
static struct shash dp_netdevs = SHASH_INITIALIZER(&dp_netdevs);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
/* Maximum port MTU seen so far. */
|
|
|
|
|
static int max_mtu = ETH_PAYLOAD_MAX;
|
|
|
|
|
|
|
|
|
|
static int get_port_by_number(struct dp_netdev *, uint16_t port_no,
|
|
|
|
|
struct dp_netdev_port **portp);
|
|
|
|
|
static int get_port_by_name(struct dp_netdev *, const char *devname,
|
|
|
|
|
struct dp_netdev_port **portp);
|
|
|
|
|
static void dp_netdev_free(struct dp_netdev *);
|
|
|
|
|
static void dp_netdev_flow_flush(struct dp_netdev *);
|
2010-12-03 14:41:38 -08:00
|
|
|
|
static int do_add_port(struct dp_netdev *, const char *devname,
|
|
|
|
|
const char *type, uint16_t port_no);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static int do_del_port(struct dp_netdev *, uint16_t port_no);
|
2010-11-29 12:21:08 -08:00
|
|
|
|
static int dpif_netdev_open(const struct dpif_class *, const char *name,
|
|
|
|
|
bool create, struct dpif **);
|
2011-07-28 09:05:25 -07:00
|
|
|
|
static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *,
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
int queue_no, const struct flow *,
|
|
|
|
|
uint64_t arg);
|
2011-10-21 14:38:54 -07:00
|
|
|
|
static void dp_netdev_execute_actions(struct dp_netdev *,
|
|
|
|
|
struct ofpbuf *, struct flow *,
|
|
|
|
|
const struct nlattr *actions,
|
|
|
|
|
size_t actions_len);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2010-11-29 12:21:08 -08:00
|
|
|
|
static struct dpif_class dpif_dummy_class;
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static struct dpif_netdev *
|
|
|
|
|
dpif_netdev_cast(const struct dpif *dpif)
|
|
|
|
|
{
|
2010-11-29 12:21:08 -08:00
|
|
|
|
assert(dpif->dpif_class->open == dpif_netdev_open);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct dp_netdev *
|
|
|
|
|
get_dp_netdev(const struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
return dpif_netdev_cast(dpif)->dp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct dpif *
|
|
|
|
|
create_dpif_netdev(struct dp_netdev *dp)
|
|
|
|
|
{
|
2010-11-24 12:35:22 -08:00
|
|
|
|
uint16_t netflow_id = hash_string(dp->name, 0);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
struct dpif_netdev *dpif;
|
|
|
|
|
|
|
|
|
|
dp->open_cnt++;
|
|
|
|
|
|
|
|
|
|
dpif = xmalloc(sizeof *dpif);
|
2010-11-29 12:21:08 -08:00
|
|
|
|
dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif->dp = dp;
|
|
|
|
|
dpif->listen_mask = 0;
|
|
|
|
|
dpif->dp_serial = dp->serial;
|
|
|
|
|
|
|
|
|
|
return &dpif->dpif;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-11-29 12:21:08 -08:00
|
|
|
|
create_dp_netdev(const char *name, const struct dpif_class *class,
|
|
|
|
|
struct dp_netdev **dpp)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp;
|
|
|
|
|
int error;
|
|
|
|
|
int i;
|
|
|
|
|
|
2010-11-24 12:35:22 -08:00
|
|
|
|
dp = xzalloc(sizeof *dp);
|
2010-11-29 12:21:08 -08:00
|
|
|
|
dp->class = class;
|
2010-11-24 12:35:22 -08:00
|
|
|
|
dp->name = xstrdup(name);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp->open_cnt = 0;
|
|
|
|
|
for (i = 0; i < N_QUEUES; i++) {
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
dp->queues[i].head = dp->queues[i].tail = 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
hmap_init(&dp->flow_table);
|
|
|
|
|
list_init(&dp->port_list);
|
2011-08-18 10:35:40 -07:00
|
|
|
|
error = do_add_port(dp, name, "internal", OVSP_LOCAL);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
dp_netdev_free(dp);
|
2010-11-24 12:35:22 -08:00
|
|
|
|
return error;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
2010-11-24 12:35:22 -08:00
|
|
|
|
shash_add(&dp_netdevs, name, dp);
|
|
|
|
|
|
|
|
|
|
*dpp = dp;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-11-29 12:21:08 -08:00
|
|
|
|
dpif_netdev_open(const struct dpif_class *class, const char *name,
|
2010-11-18 10:06:41 -08:00
|
|
|
|
bool create, struct dpif **dpifp)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2010-11-24 12:35:22 -08:00
|
|
|
|
struct dp_netdev *dp;
|
|
|
|
|
|
|
|
|
|
dp = shash_find_data(&dp_netdevs, name);
|
|
|
|
|
if (!dp) {
|
|
|
|
|
if (!create) {
|
|
|
|
|
return ENODEV;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
} else {
|
2010-11-29 12:21:08 -08:00
|
|
|
|
int error = create_dp_netdev(name, class, &dp);
|
2010-11-24 12:35:22 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
2010-11-24 12:35:22 -08:00
|
|
|
|
assert(dp != NULL);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
} else {
|
2010-11-29 12:21:08 -08:00
|
|
|
|
if (dp->class != class) {
|
|
|
|
|
return EINVAL;
|
|
|
|
|
} else if (create) {
|
2010-11-24 12:35:22 -08:00
|
|
|
|
return EEXIST;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
2010-11-24 12:35:22 -08:00
|
|
|
|
|
|
|
|
|
*dpifp = create_dpif_netdev(dp);
|
|
|
|
|
return 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-01-04 17:00:36 -08:00
|
|
|
|
dp_netdev_purge_queues(struct dp_netdev *dp)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < N_QUEUES; i++) {
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
struct dp_netdev_queue *q = &dp->queues[i];
|
|
|
|
|
|
2011-01-04 17:00:36 -08:00
|
|
|
|
while (q->tail != q->head) {
|
|
|
|
|
struct dpif_upcall *upcall = q->upcalls[q->tail++ & QUEUE_MASK];
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
|
|
|
|
|
ofpbuf_delete(upcall->packet);
|
|
|
|
|
free(upcall);
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
2011-01-04 17:00:36 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dp_netdev_free(struct dp_netdev *dp)
|
|
|
|
|
{
|
2011-08-10 12:40:10 -07:00
|
|
|
|
struct dp_netdev_port *port, *next;
|
|
|
|
|
|
2011-01-04 17:00:36 -08:00
|
|
|
|
dp_netdev_flow_flush(dp);
|
2011-08-10 12:40:10 -07:00
|
|
|
|
LIST_FOR_EACH_SAFE (port, next, node, &dp->port_list) {
|
2011-01-04 17:00:36 -08:00
|
|
|
|
do_del_port(dp, port->port_no);
|
|
|
|
|
}
|
|
|
|
|
dp_netdev_purge_queues(dp);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
hmap_destroy(&dp->flow_table);
|
2010-11-24 12:35:22 -08:00
|
|
|
|
free(dp->name);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
free(dp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_netdev_close(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
assert(dp->open_cnt > 0);
|
2010-02-08 13:22:41 -05:00
|
|
|
|
if (--dp->open_cnt == 0 && dp->destroyed) {
|
2010-11-24 12:35:22 -08:00
|
|
|
|
shash_find_and_delete(&dp_netdevs, dp->name);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp_netdev_free(dp);
|
|
|
|
|
}
|
|
|
|
|
free(dpif);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-02-08 13:22:41 -05:00
|
|
|
|
dpif_netdev_destroy(struct dpif *dpif)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
2010-02-08 13:22:41 -05:00
|
|
|
|
dp->destroyed = true;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-10-05 11:18:13 -07:00
|
|
|
|
dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
2011-08-04 08:04:10 +09:00
|
|
|
|
stats->n_flows = hmap_count(&dp->flow_table);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
stats->n_hit = dp->n_hit;
|
|
|
|
|
stats->n_missed = dp->n_missed;
|
|
|
|
|
stats->n_lost = dp->n_lost;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-12-03 14:41:38 -08:00
|
|
|
|
do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
uint16_t port_no)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_port *port;
|
|
|
|
|
struct netdev *netdev;
|
2010-12-03 14:41:38 -08:00
|
|
|
|
bool internal;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
int mtu;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
/* XXX reject devices already in some dp_netdev. */
|
2011-08-18 11:17:29 -07:00
|
|
|
|
if (type[0] == '\0' || !strcmp(type, "system") || !strcmp(type, "dummy")) {
|
2010-12-03 14:41:38 -08:00
|
|
|
|
internal = false;
|
|
|
|
|
} else if (!strcmp(type, "internal")) {
|
|
|
|
|
internal = true;
|
|
|
|
|
} else {
|
|
|
|
|
VLOG_WARN("%s: unsupported port type %s", devname, type);
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
/* Open and validate network device. */
|
2010-11-29 12:21:08 -08:00
|
|
|
|
if (dp->class == &dpif_dummy_class) {
|
2011-08-05 14:18:06 -07:00
|
|
|
|
type = "dummy";
|
2010-11-29 12:21:08 -08:00
|
|
|
|
} else if (internal) {
|
2011-08-05 14:18:06 -07:00
|
|
|
|
type = "tap";
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
2010-01-12 16:01:43 -05:00
|
|
|
|
|
2011-08-05 14:18:06 -07:00
|
|
|
|
error = netdev_open(devname, type, &netdev);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
/* XXX reject loopback devices */
|
|
|
|
|
/* XXX reject non-Ethernet devices */
|
|
|
|
|
|
2011-08-05 14:15:32 -07:00
|
|
|
|
error = netdev_listen(netdev);
|
|
|
|
|
if (error) {
|
|
|
|
|
VLOG_ERR("%s: cannot receive packets on this network device (%s)",
|
|
|
|
|
devname, strerror(errno));
|
|
|
|
|
netdev_close(netdev);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false);
|
|
|
|
|
if (error) {
|
|
|
|
|
netdev_close(netdev);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
port = xmalloc(sizeof *port);
|
|
|
|
|
port->port_no = port_no;
|
|
|
|
|
port->netdev = netdev;
|
|
|
|
|
port->internal = internal;
|
|
|
|
|
|
2011-09-12 17:12:52 -07:00
|
|
|
|
error = netdev_get_mtu(netdev, &mtu);
|
|
|
|
|
if (!error) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
max_mtu = mtu;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
list_push_back(&dp->port_list, &port->node);
|
|
|
|
|
dp->ports[port_no] = port;
|
|
|
|
|
dp->serial++;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-12-03 14:41:38 -08:00
|
|
|
|
dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
uint16_t *port_nop)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
int port_no;
|
|
|
|
|
|
|
|
|
|
for (port_no = 0; port_no < MAX_PORTS; port_no++) {
|
|
|
|
|
if (!dp->ports[port_no]) {
|
|
|
|
|
*port_nop = port_no;
|
2010-12-03 14:41:38 -08:00
|
|
|
|
return do_add_port(dp, netdev_get_name(netdev),
|
|
|
|
|
netdev_get_type(netdev), port_no);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
2009-08-25 14:12:01 -07:00
|
|
|
|
return EFBIG;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_port_del(struct dpif *dpif, uint16_t port_no)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
2011-08-18 10:35:40 -07:00
|
|
|
|
return port_no == OVSP_LOCAL ? EINVAL : do_del_port(dp, port_no);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_valid_port_number(uint16_t port_no)
|
|
|
|
|
{
|
|
|
|
|
return port_no < MAX_PORTS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
get_port_by_number(struct dp_netdev *dp,
|
|
|
|
|
uint16_t port_no, struct dp_netdev_port **portp)
|
|
|
|
|
{
|
|
|
|
|
if (!is_valid_port_number(port_no)) {
|
|
|
|
|
*portp = NULL;
|
|
|
|
|
return EINVAL;
|
|
|
|
|
} else {
|
|
|
|
|
*portp = dp->ports[port_no];
|
|
|
|
|
return *portp ? 0 : ENOENT;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
get_port_by_name(struct dp_netdev *dp,
|
|
|
|
|
const char *devname, struct dp_netdev_port **portp)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_port *port;
|
|
|
|
|
|
2010-09-17 10:33:10 -07:00
|
|
|
|
LIST_FOR_EACH (port, node, &dp->port_list) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (!strcmp(netdev_get_name(port->netdev), devname)) {
|
|
|
|
|
*portp = port;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ENOENT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
do_del_port(struct dp_netdev *dp, uint16_t port_no)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_port *port;
|
2009-12-01 01:14:33 -08:00
|
|
|
|
char *name;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
error = get_port_by_number(dp, port_no, &port);
|
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
list_remove(&port->node);
|
|
|
|
|
dp->ports[port->port_no] = NULL;
|
|
|
|
|
dp->serial++;
|
|
|
|
|
|
2009-12-01 01:14:33 -08:00
|
|
|
|
name = xstrdup(netdev_get_name(port->netdev));
|
2009-06-19 14:09:39 -07:00
|
|
|
|
netdev_close(port->netdev);
|
2010-01-12 16:01:43 -05:00
|
|
|
|
|
2009-12-01 01:14:33 -08:00
|
|
|
|
free(name);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
free(port);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-01-23 18:48:02 -08:00
|
|
|
|
answer_port_query(const struct dp_netdev_port *port,
|
|
|
|
|
struct dpif_port *dpif_port)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_port->name = xstrdup(netdev_get_name(port->netdev));
|
2011-12-16 10:03:08 -08:00
|
|
|
|
dpif_port->type = xstrdup(port->internal ? "internal"
|
|
|
|
|
: netdev_get_type(port->netdev));
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_port->port_no = port->port_no;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dpif_port *dpif_port)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_port *port;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
error = get_port_by_number(dp, port_no, &port);
|
|
|
|
|
if (!error) {
|
2011-01-23 18:48:02 -08:00
|
|
|
|
answer_port_query(port, dpif_port);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dpif_port *dpif_port)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_port *port;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
error = get_port_by_name(dp, devname, &port);
|
|
|
|
|
if (!error) {
|
2011-01-23 18:48:02 -08:00
|
|
|
|
answer_port_query(port, dpif_port);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 09:24:59 -08:00
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_get_max_ports(const struct dpif *dpif OVS_UNUSED)
|
|
|
|
|
{
|
|
|
|
|
return MAX_PORTS;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static void
|
|
|
|
|
dp_netdev_free_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
|
|
|
|
|
{
|
|
|
|
|
hmap_remove(&dp->flow_table, &flow->node);
|
|
|
|
|
free(flow->actions);
|
|
|
|
|
free(flow);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dp_netdev_flow_flush(struct dp_netdev *dp)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_flow *flow, *next;
|
|
|
|
|
|
2010-09-17 10:33:10 -07:00
|
|
|
|
HMAP_FOR_EACH_SAFE (flow, next, node, &dp->flow_table) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp_netdev_free_flow(dp, flow);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_flow_flush(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
dp_netdev_flow_flush(dp);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
struct dp_netdev_port_state {
|
|
|
|
|
uint32_t port_no;
|
2011-01-23 18:48:02 -08:00
|
|
|
|
char *name;
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
|
|
|
|
|
{
|
|
|
|
|
*statep = xzalloc(sizeof(struct dp_netdev_port_state));
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static int
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dpif_port *dpif_port)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
struct dp_netdev_port_state *state = state_;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
uint32_t port_no;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
for (port_no = state->port_no; port_no < MAX_PORTS; port_no++) {
|
|
|
|
|
struct dp_netdev_port *port = dp->ports[port_no];
|
|
|
|
|
if (port) {
|
2011-01-23 18:48:02 -08:00
|
|
|
|
free(state->name);
|
|
|
|
|
state->name = xstrdup(netdev_get_name(port->netdev));
|
|
|
|
|
dpif_port->name = state->name;
|
2011-12-16 10:03:08 -08:00
|
|
|
|
dpif_port->type = (char *) (port->internal ? "internal"
|
|
|
|
|
: netdev_get_type(port->netdev));
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_port->port_no = port->port_no;
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
state->port_no = port_no + 1;
|
|
|
|
|
return 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
return EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-01-23 18:48:02 -08:00
|
|
|
|
dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
{
|
2011-01-23 18:48:02 -08:00
|
|
|
|
struct dp_netdev_port_state *state = state_;
|
|
|
|
|
free(state->name);
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
free(state);
|
|
|
|
|
return 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-02-11 10:59:47 -08:00
|
|
|
|
dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
|
|
|
|
|
if (dpif->dp_serial != dpif->dp->serial) {
|
|
|
|
|
dpif->dp_serial = dpif->dp->serial;
|
|
|
|
|
return ENOBUFS;
|
|
|
|
|
} else {
|
|
|
|
|
return EAGAIN;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_netdev_port_poll_wait(const struct dpif *dpif_)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
|
|
|
|
|
if (dpif->dp_serial != dpif->dp->serial) {
|
|
|
|
|
poll_immediate_wake();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct dp_netdev_flow *
|
2010-10-11 13:31:35 -07:00
|
|
|
|
dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *key)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_flow *flow;
|
|
|
|
|
|
2010-09-17 10:33:10 -07:00
|
|
|
|
HMAP_FOR_EACH_WITH_HASH (flow, node, flow_hash(key, 0), &dp->flow_table) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (flow_equal(&flow->key, key)) {
|
|
|
|
|
return flow;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-01-26 07:11:50 -08:00
|
|
|
|
get_dpif_flow_stats(struct dp_netdev_flow *flow, struct dpif_flow_stats *stats)
|
2011-01-26 07:03:39 -08:00
|
|
|
|
{
|
|
|
|
|
stats->n_packets = flow->packet_count;
|
|
|
|
|
stats->n_bytes = flow->byte_count;
|
2011-01-26 07:11:50 -08:00
|
|
|
|
stats->used = flow->used;
|
2011-01-26 07:03:39 -08:00
|
|
|
|
stats->tcp_flags = TCP_FLAGS(flow->tcp_ctl);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-01-23 18:44:44 -08:00
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
|
|
|
|
|
struct flow *flow)
|
|
|
|
|
{
|
|
|
|
|
if (odp_flow_key_to_flow(key, key_len, flow)) {
|
|
|
|
|
/* This should not happen: it indicates that odp_flow_key_from_flow()
|
|
|
|
|
* and odp_flow_key_to_flow() disagree on the acceptable form of a
|
|
|
|
|
* flow. Log the problem as an error, with enough details to enable
|
|
|
|
|
* debugging. */
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
|
|
|
|
|
if (!VLOG_DROP_ERR(&rl)) {
|
|
|
|
|
struct ds s;
|
|
|
|
|
|
|
|
|
|
ds_init(&s);
|
|
|
|
|
odp_flow_key_format(key, key_len, &s);
|
|
|
|
|
VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
|
|
|
|
|
ds_destroy(&s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-08 16:30:20 -07:00
|
|
|
|
if (flow->in_port < OFPP_MAX
|
|
|
|
|
? flow->in_port >= MAX_PORTS
|
|
|
|
|
: flow->in_port != OFPP_LOCAL && flow->in_port != OFPP_NONE) {
|
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-23 18:44:44 -08:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static int
|
2011-01-17 14:43:30 -08:00
|
|
|
|
dpif_netdev_flow_get(const struct dpif *dpif,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *nl_key, size_t nl_key_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
2011-01-17 14:40:58 -08:00
|
|
|
|
struct dp_netdev_flow *flow;
|
|
|
|
|
struct flow key;
|
|
|
|
|
int error;
|
2011-01-23 18:44:44 -08:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
|
2011-01-17 14:40:58 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
2010-10-11 13:31:35 -07:00
|
|
|
|
|
2011-01-17 14:40:58 -08:00
|
|
|
|
flow = dp_netdev_lookup_flow(dp, &key);
|
|
|
|
|
if (!flow) {
|
|
|
|
|
return ENOENT;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
2011-01-17 14:40:58 -08:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (stats) {
|
2011-01-26 07:11:50 -08:00
|
|
|
|
get_dpif_flow_stats(flow, stats);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
}
|
|
|
|
|
if (actionsp) {
|
|
|
|
|
*actionsp = ofpbuf_clone_data(flow->actions, flow->actions_len);
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-01-26 07:03:39 -08:00
|
|
|
|
set_flow_actions(struct dp_netdev_flow *flow,
|
|
|
|
|
const struct nlattr *actions, size_t actions_len)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2011-01-26 07:03:39 -08:00
|
|
|
|
flow->actions = xrealloc(flow->actions, actions_len);
|
|
|
|
|
flow->actions_len = actions_len;
|
|
|
|
|
memcpy(flow->actions, actions, actions_len);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-01-26 07:03:39 -08:00
|
|
|
|
add_flow(struct dpif *dpif, const struct flow *key,
|
|
|
|
|
const struct nlattr *actions, size_t actions_len)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_flow *flow;
|
|
|
|
|
int error;
|
|
|
|
|
|
2009-09-28 13:56:42 -07:00
|
|
|
|
flow = xzalloc(sizeof *flow);
|
2011-01-23 18:44:44 -08:00
|
|
|
|
flow->key = *key;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
error = set_flow_actions(flow, actions, actions_len);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
free(flow);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hmap_insert(&dp->flow_table, &flow->node, flow_hash(&flow->key, 0));
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
clear_stats(struct dp_netdev_flow *flow)
|
|
|
|
|
{
|
2011-01-26 07:11:50 -08:00
|
|
|
|
flow->used = 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
flow->packet_count = 0;
|
|
|
|
|
flow->byte_count = 0;
|
|
|
|
|
flow->tcp_ctl = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-01-26 07:12:24 -08:00
|
|
|
|
dpif_netdev_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr *nl_key, size_t nl_key_len,
|
|
|
|
|
const struct nlattr *actions, size_t actions_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct dpif_flow_stats *stats)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_flow *flow;
|
2010-10-11 13:31:35 -07:00
|
|
|
|
struct flow key;
|
2011-01-23 18:44:44 -08:00
|
|
|
|
int error;
|
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
|
2011-01-23 18:44:44 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2010-10-11 13:31:35 -07:00
|
|
|
|
flow = dp_netdev_lookup_flow(dp, &key);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (!flow) {
|
2011-01-26 07:12:24 -08:00
|
|
|
|
if (flags & DPIF_FP_CREATE) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (stats) {
|
|
|
|
|
memset(stats, 0, sizeof *stats);
|
|
|
|
|
}
|
|
|
|
|
return add_flow(dpif, &key, actions, actions_len);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
} else {
|
2009-08-25 14:12:01 -07:00
|
|
|
|
return EFBIG;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
return ENOENT;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2011-01-26 07:12:24 -08:00
|
|
|
|
if (flags & DPIF_FP_MODIFY) {
|
2011-01-26 07:03:39 -08:00
|
|
|
|
int error = set_flow_actions(flow, actions, actions_len);
|
|
|
|
|
if (!error) {
|
|
|
|
|
if (stats) {
|
2011-01-26 07:11:50 -08:00
|
|
|
|
get_dpif_flow_stats(flow, stats);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
}
|
2011-01-26 07:12:24 -08:00
|
|
|
|
if (flags & DPIF_FP_ZERO_STATS) {
|
2011-01-26 07:03:39 -08:00
|
|
|
|
clear_stats(flow);
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
} else {
|
|
|
|
|
return EEXIST;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-01-26 07:03:39 -08:00
|
|
|
|
dpif_netdev_flow_del(struct dpif *dpif,
|
|
|
|
|
const struct nlattr *nl_key, size_t nl_key_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct dpif_flow_stats *stats)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_flow *flow;
|
2010-10-11 13:31:35 -07:00
|
|
|
|
struct flow key;
|
2011-01-23 18:44:44 -08:00
|
|
|
|
int error;
|
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
|
2011-01-23 18:44:44 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2010-10-11 13:31:35 -07:00
|
|
|
|
flow = dp_netdev_lookup_flow(dp, &key);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (flow) {
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (stats) {
|
2011-01-26 07:11:50 -08:00
|
|
|
|
get_dpif_flow_stats(flow, stats);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp_netdev_free_flow(dp, flow);
|
|
|
|
|
return 0;
|
|
|
|
|
} else {
|
|
|
|
|
return ENOENT;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
struct dp_netdev_flow_state {
|
|
|
|
|
uint32_t bucket;
|
|
|
|
|
uint32_t offset;
|
2011-01-26 07:03:39 -08:00
|
|
|
|
struct nlattr *actions;
|
2011-03-02 13:25:10 -08:00
|
|
|
|
struct odputil_keybuf keybuf;
|
2011-01-26 07:11:50 -08:00
|
|
|
|
struct dpif_flow_stats stats;
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
};
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static int
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2011-01-26 07:03:39 -08:00
|
|
|
|
struct dp_netdev_flow_state *state;
|
|
|
|
|
|
|
|
|
|
*statep = state = xmalloc(sizeof *state);
|
|
|
|
|
state->bucket = 0;
|
|
|
|
|
state->offset = 0;
|
|
|
|
|
state->actions = NULL;
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
|
2011-01-26 07:03:39 -08:00
|
|
|
|
const struct nlattr **key, size_t *key_len,
|
|
|
|
|
const struct nlattr **actions, size_t *actions_len,
|
2011-01-26 07:11:50 -08:00
|
|
|
|
const struct dpif_flow_stats **stats)
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_flow_state *state = state_;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_flow *flow;
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
struct hmap_node *node;
|
2010-10-11 13:31:35 -07:00
|
|
|
|
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset);
|
|
|
|
|
if (!node) {
|
|
|
|
|
return EOF;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
|
|
|
|
|
flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
|
2011-01-23 18:44:44 -08:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
if (key) {
|
|
|
|
|
struct ofpbuf buf;
|
|
|
|
|
|
2011-03-02 13:25:10 -08:00
|
|
|
|
ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
odp_flow_key_from_flow(&buf, &flow->key);
|
2011-01-23 18:44:44 -08:00
|
|
|
|
|
2011-01-26 07:03:39 -08:00
|
|
|
|
*key = buf.data;
|
|
|
|
|
*key_len = buf.size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (actions) {
|
|
|
|
|
free(state->actions);
|
|
|
|
|
state->actions = xmemdup(flow->actions, flow->actions_len);
|
|
|
|
|
|
|
|
|
|
*actions = state->actions;
|
|
|
|
|
*actions_len = flow->actions_len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (stats) {
|
2011-01-26 07:11:50 -08:00
|
|
|
|
get_dpif_flow_stats(flow, &state->stats);
|
2011-01-26 07:03:39 -08:00
|
|
|
|
*stats = &state->stats;
|
|
|
|
|
}
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-01-26 07:03:39 -08:00
|
|
|
|
dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
{
|
2011-01-26 07:03:39 -08:00
|
|
|
|
struct dp_netdev_flow_state *state = state_;
|
|
|
|
|
|
|
|
|
|
free(state->actions);
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
free(state);
|
|
|
|
|
return 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2010-10-08 16:36:13 -07:00
|
|
|
|
dpif_netdev_execute(struct dpif *dpif,
|
2011-06-01 13:39:51 -07:00
|
|
|
|
const struct nlattr *key_attrs, size_t key_len,
|
2010-12-11 22:51:31 -08:00
|
|
|
|
const struct nlattr *actions, size_t actions_len,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
const struct ofpbuf *packet)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct ofpbuf copy;
|
2010-09-03 11:30:02 -07:00
|
|
|
|
struct flow key;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
int error;
|
|
|
|
|
|
2009-08-25 14:12:01 -07:00
|
|
|
|
if (packet->size < ETH_HEADER_LEN || packet->size > UINT16_MAX) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
dpif-netdev: Simplify code by removing dpif_netdev_validate_actions().
dpif_netdev_validate_actions() existed for three reasons. First, it checked
that the actions were well-formed and valid. This isn't really necessary,
because the actions are built internally by ofproto-dpif and will always be
well-formed. (If not, that's a bug in ofproto-dpif.) Second, it checks
whether the actions will modify (mutate) the data in the packet and reports
that to the caller, which can use it to optimize what it does. However,
the only caller that used this was dpif_netdev_execute(), which is not a
fast-path (if dpif-netdev can be said to have a fast path at all).
Third, dpif_netdev_validate_actions() rejects certain actions that
dpif-netdev does not implement: OVS_ACTION_ATTR_SET_TUNNEL,
OVS_ACTION_ATTR_SET_PRIORITY, and OVS_ACTION_ATTR_POP_PRIORITY. However,
this doesn't really seem necessary to me. First, dpif-netdev can't support
tunnels in any case, so OVS_ACTION_ATTR_SET_TUNNEL shouldn't come up.
Second, the priority actions just aren't important enough to worry about;
they only affect QoS, which isn't really important with dpif-netdev since
it's going to be slow anyway.
So this commit just drops dpif_netdev_validate_actions() entirely.
2011-10-05 09:04:50 -07:00
|
|
|
|
/* Make a deep copy of 'packet', because we might modify its data. */
|
|
|
|
|
ofpbuf_init(©, DP_NETDEV_HEADROOM + packet->size);
|
|
|
|
|
ofpbuf_reserve(©, DP_NETDEV_HEADROOM);
|
|
|
|
|
ofpbuf_put(©, packet->data, packet->size);
|
2011-06-01 13:39:51 -07:00
|
|
|
|
|
2011-11-01 10:13:16 -07:00
|
|
|
|
flow_extract(©, 0, 0, -1, &key);
|
2011-09-08 16:30:20 -07:00
|
|
|
|
error = dpif_netdev_flow_from_nlattrs(key_attrs, key_len, &key);
|
|
|
|
|
if (!error) {
|
2011-10-21 14:38:54 -07:00
|
|
|
|
dp_netdev_execute_actions(dp, ©, &key,
|
|
|
|
|
actions, actions_len);
|
2011-09-08 16:30:20 -07:00
|
|
|
|
}
|
dpif-netdev: Simplify code by removing dpif_netdev_validate_actions().
dpif_netdev_validate_actions() existed for three reasons. First, it checked
that the actions were well-formed and valid. This isn't really necessary,
because the actions are built internally by ofproto-dpif and will always be
well-formed. (If not, that's a bug in ofproto-dpif.) Second, it checks
whether the actions will modify (mutate) the data in the packet and reports
that to the caller, which can use it to optimize what it does. However,
the only caller that used this was dpif_netdev_execute(), which is not a
fast-path (if dpif-netdev can be said to have a fast path at all).
Third, dpif_netdev_validate_actions() rejects certain actions that
dpif-netdev does not implement: OVS_ACTION_ATTR_SET_TUNNEL,
OVS_ACTION_ATTR_SET_PRIORITY, and OVS_ACTION_ATTR_POP_PRIORITY. However,
this doesn't really seem necessary to me. First, dpif-netdev can't support
tunnels in any case, so OVS_ACTION_ATTR_SET_TUNNEL shouldn't come up.
Second, the priority actions just aren't important enough to worry about;
they only affect QoS, which isn't really important with dpif-netdev since
it's going to be slow anyway.
So this commit just drops dpif_netdev_validate_actions() entirely.
2011-10-05 09:04:50 -07:00
|
|
|
|
|
|
|
|
|
ofpbuf_uninit(©);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_recv_get_mask(const struct dpif *dpif, int *listen_mask)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
|
|
|
|
|
*listen_mask = dpif_netdev->listen_mask;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_recv_set_mask(struct dpif *dpif, int listen_mask)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
|
2011-01-26 07:14:04 -08:00
|
|
|
|
dpif_netdev->listen_mask = listen_mask;
|
|
|
|
|
return 0;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-11-21 13:36:17 -08:00
|
|
|
|
static int
|
|
|
|
|
dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
|
|
|
|
|
uint32_t queue_id, uint32_t *priority)
|
|
|
|
|
{
|
|
|
|
|
*priority = queue_id;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
static struct dp_netdev_queue *
|
2009-06-19 14:09:39 -07:00
|
|
|
|
find_nonempty_queue(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
|
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
int mask = dpif_netdev->listen_mask;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < N_QUEUES; i++) {
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
struct dp_netdev_queue *q = &dp->queues[i];
|
|
|
|
|
if (q->head != q->tail && mask & (1u << i)) {
|
|
|
|
|
return q;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
return NULL;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
struct dp_netdev_queue *q = find_nonempty_queue(dpif);
|
|
|
|
|
if (q) {
|
|
|
|
|
struct dpif_upcall *u = q->upcalls[q->tail++ & QUEUE_MASK];
|
|
|
|
|
*upcall = *u;
|
|
|
|
|
free(u);
|
2010-12-06 10:03:31 -08:00
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
return 0;
|
|
|
|
|
} else {
|
|
|
|
|
return EAGAIN;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_netdev_recv_wait(struct dpif *dpif)
|
|
|
|
|
{
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
if (find_nonempty_queue(dpif)) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
poll_immediate_wake();
|
|
|
|
|
} else {
|
|
|
|
|
/* No messages ready to be received, and dp_wait() will ensure that we
|
|
|
|
|
* wake up to queue new messages, so there is nothing to do. */
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-01-04 17:00:36 -08:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dpif_netdev_recv_purge(struct dpif *dpif)
|
|
|
|
|
{
|
|
|
|
|
struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
|
|
|
|
|
dp_netdev_purge_queues(dpif_netdev->dp);
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
static void
|
2010-10-11 13:31:35 -07:00
|
|
|
|
dp_netdev_flow_used(struct dp_netdev_flow *flow, struct flow *key,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
const struct ofpbuf *packet)
|
|
|
|
|
{
|
2011-01-26 07:11:50 -08:00
|
|
|
|
flow->used = time_msec();
|
2009-06-19 14:09:39 -07:00
|
|
|
|
flow->packet_count++;
|
|
|
|
|
flow->byte_count += packet->size;
|
2010-07-27 10:02:07 -07:00
|
|
|
|
if (key->dl_type == htons(ETH_TYPE_IP) && key->nw_proto == IPPROTO_TCP) {
|
|
|
|
|
struct tcp_header *th = packet->l4;
|
|
|
|
|
flow->tcp_ctl |= th->tcp_ctl;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port,
|
|
|
|
|
struct ofpbuf *packet)
|
|
|
|
|
{
|
|
|
|
|
struct dp_netdev_flow *flow;
|
2010-10-11 13:31:35 -07:00
|
|
|
|
struct flow key;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2010-08-10 11:38:55 -07:00
|
|
|
|
if (packet->size < ETH_HEADER_LEN) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2011-11-01 10:13:16 -07:00
|
|
|
|
flow_extract(packet, 0, 0, port->port_no, &key);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
flow = dp_netdev_lookup_flow(dp, &key);
|
|
|
|
|
if (flow) {
|
|
|
|
|
dp_netdev_flow_used(flow, &key, packet);
|
|
|
|
|
dp_netdev_execute_actions(dp, packet, &key,
|
2010-12-10 10:40:58 -08:00
|
|
|
|
flow->actions, flow->actions_len);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp->n_hit++;
|
|
|
|
|
} else {
|
|
|
|
|
dp->n_missed++;
|
2011-07-28 09:05:25 -07:00
|
|
|
|
dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, 0);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-05-06 15:04:29 -07:00
|
|
|
|
dpif_netdev_run(struct dpif *dpif)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2011-05-06 15:04:29 -07:00
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_port *port;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
struct ofpbuf packet;
|
|
|
|
|
|
2011-01-24 09:41:29 -08:00
|
|
|
|
ofpbuf_init(&packet, DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + max_mtu);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2011-05-06 15:04:29 -07:00
|
|
|
|
LIST_FOR_EACH (port, node, &dp->port_list) {
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
/* Reset packet contents. */
|
|
|
|
|
ofpbuf_clear(&packet);
|
|
|
|
|
ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
|
|
|
|
|
|
|
|
|
|
error = netdev_recv(port->netdev, &packet);
|
|
|
|
|
if (!error) {
|
|
|
|
|
dp_netdev_port_input(dp, port, &packet);
|
|
|
|
|
} else if (error != EAGAIN && error != EOPNOTSUPP) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
|
|
|
|
|
netdev_get_name(port->netdev), strerror(error));
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ofpbuf_uninit(&packet);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-05-06 15:04:29 -07:00
|
|
|
|
dpif_netdev_wait(struct dpif *dpif)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2011-05-06 15:04:29 -07:00
|
|
|
|
struct dp_netdev *dp = get_dp_netdev(dpif);
|
|
|
|
|
struct dp_netdev_port *port;
|
2010-11-24 12:35:22 -08:00
|
|
|
|
|
2011-05-06 15:04:29 -07:00
|
|
|
|
LIST_FOR_EACH (port, node, &dp->port_list) {
|
|
|
|
|
netdev_recv_wait(port->netdev);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-10-21 14:38:54 -07:00
|
|
|
|
dp_netdev_set_dl(struct ofpbuf *packet, const struct ovs_key_ethernet *eth_key)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
|
|
|
|
struct eth_header *eh = packet->l2;
|
2011-10-21 14:38:54 -07:00
|
|
|
|
|
|
|
|
|
memcpy(eh->eth_src, eth_key->eth_src, sizeof eh->eth_src);
|
|
|
|
|
memcpy(eh->eth_dst, eth_key->eth_dst, sizeof eh->eth_dst);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2011-10-21 14:38:54 -07:00
|
|
|
|
dp_netdev_set_ip_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2011-10-21 14:38:54 -07:00
|
|
|
|
struct ip_header *nh = packet->l3;
|
|
|
|
|
|
|
|
|
|
if (nh->ip_proto == IPPROTO_TCP && packet->l7) {
|
|
|
|
|
struct tcp_header *th = packet->l4;
|
|
|
|
|
th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr);
|
|
|
|
|
} else if (nh->ip_proto == IPPROTO_UDP && packet->l7) {
|
|
|
|
|
struct udp_header *uh = packet->l4;
|
|
|
|
|
if (uh->udp_csum) {
|
|
|
|
|
uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr);
|
|
|
|
|
if (!uh->udp_csum) {
|
|
|
|
|
uh->udp_csum = htons(0xffff);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
nh->ip_csum = recalc_csum32(nh->ip_csum, *addr, new_addr);
|
|
|
|
|
*addr = new_addr;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-10-21 14:38:54 -07:00
|
|
|
|
static void
|
|
|
|
|
dp_netdev_set_ip_tos(struct ip_header *nh, uint8_t new_tos)
|
2010-08-13 10:46:12 -07:00
|
|
|
|
{
|
2011-10-21 14:38:54 -07:00
|
|
|
|
uint8_t *field = &nh->ip_tos;
|
|
|
|
|
|
|
|
|
|
nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field),
|
2011-11-02 23:34:15 -07:00
|
|
|
|
htons((uint16_t) new_tos));
|
|
|
|
|
*field = new_tos;
|
2010-08-13 10:46:12 -07:00
|
|
|
|
}
|
|
|
|
|
|
2011-11-05 15:48:12 -07:00
|
|
|
|
static void
|
|
|
|
|
dp_netdev_set_ip_ttl(struct ip_header *nh, uint8_t new_ttl)
|
|
|
|
|
{
|
|
|
|
|
uint8_t *field = &nh->ip_ttl;
|
|
|
|
|
|
|
|
|
|
nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
|
|
|
|
|
htons(new_ttl << 8));
|
|
|
|
|
*field = new_ttl;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static void
|
2011-10-21 14:38:54 -07:00
|
|
|
|
dp_netdev_set_ipv4(struct ofpbuf *packet, const struct ovs_key_ipv4 *ipv4_key)
|
|
|
|
|
{
|
|
|
|
|
struct ip_header *nh = packet->l3;
|
|
|
|
|
|
|
|
|
|
if (nh->ip_src != ipv4_key->ipv4_src) {
|
|
|
|
|
dp_netdev_set_ip_addr(packet, &nh->ip_src, ipv4_key->ipv4_src);
|
|
|
|
|
}
|
|
|
|
|
if (nh->ip_dst != ipv4_key->ipv4_dst) {
|
|
|
|
|
dp_netdev_set_ip_addr(packet, &nh->ip_dst, ipv4_key->ipv4_dst);
|
|
|
|
|
}
|
|
|
|
|
if (nh->ip_tos != ipv4_key->ipv4_tos) {
|
|
|
|
|
dp_netdev_set_ip_tos(nh, ipv4_key->ipv4_tos);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
2011-11-05 15:48:12 -07:00
|
|
|
|
if (nh->ip_ttl != ipv4_key->ipv4_ttl) {
|
|
|
|
|
dp_netdev_set_ip_ttl(nh, ipv4_key->ipv4_ttl);
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
2009-11-11 14:59:49 -08:00
|
|
|
|
static void
|
2011-10-21 14:38:54 -07:00
|
|
|
|
dp_netdev_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
|
2009-11-11 14:59:49 -08:00
|
|
|
|
{
|
2011-10-21 14:38:54 -07:00
|
|
|
|
*csum = recalc_csum16(*csum, *port, new_port);
|
|
|
|
|
*port = new_port;
|
|
|
|
|
}
|
2009-11-11 14:59:49 -08:00
|
|
|
|
|
2011-10-21 14:38:54 -07:00
|
|
|
|
static void
|
|
|
|
|
dp_netdev_set_tcp_port(struct ofpbuf *packet, const struct ovs_key_tcp *tcp_key)
|
|
|
|
|
{
|
|
|
|
|
struct tcp_header *th = packet->l4;
|
2009-11-11 14:59:49 -08:00
|
|
|
|
|
2011-10-21 14:38:54 -07:00
|
|
|
|
if (th->tcp_src != tcp_key->tcp_src) {
|
|
|
|
|
dp_netdev_set_port(&th->tcp_src, tcp_key->tcp_src, &th->tcp_csum);
|
|
|
|
|
}
|
|
|
|
|
if (th->tcp_dst != tcp_key->tcp_dst) {
|
|
|
|
|
dp_netdev_set_port(&th->tcp_dst, tcp_key->tcp_dst, &th->tcp_csum);
|
2009-11-11 14:59:49 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-19 14:09:39 -07:00
|
|
|
|
static void
|
2011-10-21 14:38:54 -07:00
|
|
|
|
dp_netdev_set_udp_port(struct ofpbuf *packet, const struct ovs_key_udp *udp_key)
|
|
|
|
|
{
|
|
|
|
|
struct udp_header *uh = packet->l4;
|
|
|
|
|
|
2011-12-21 11:19:28 -08:00
|
|
|
|
if (uh->udp_csum) {
|
|
|
|
|
if (uh->udp_src != udp_key->udp_src) {
|
|
|
|
|
dp_netdev_set_port(&uh->udp_src, udp_key->udp_src, &uh->udp_csum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (uh->udp_dst != udp_key->udp_dst) {
|
|
|
|
|
dp_netdev_set_port(&uh->udp_dst, udp_key->udp_dst, &uh->udp_csum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!uh->udp_csum) {
|
|
|
|
|
uh->udp_csum = htons(0xffff);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
uh->udp_src = udp_key->udp_src;
|
|
|
|
|
uh->udp_dst = udp_key->udp_dst;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
dp_netdev_output_port(struct dp_netdev *dp, struct ofpbuf *packet,
|
|
|
|
|
uint16_t out_port)
|
|
|
|
|
{
|
2010-08-24 16:00:46 -07:00
|
|
|
|
struct dp_netdev_port *p = dp->ports[out_port];
|
2009-06-19 14:09:39 -07:00
|
|
|
|
if (p) {
|
|
|
|
|
netdev_send(p->netdev, packet);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2011-07-28 09:05:25 -07:00
|
|
|
|
dp_netdev_output_userspace(struct dp_netdev *dp, const struct ofpbuf *packet,
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
int queue_no, const struct flow *flow, uint64_t arg)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
struct dp_netdev_queue *q = &dp->queues[queue_no];
|
|
|
|
|
struct dpif_upcall *upcall;
|
|
|
|
|
struct ofpbuf *buf;
|
|
|
|
|
size_t key_len;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
if (q->head - q->tail >= MAX_QUEUE_LEN) {
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp->n_lost++;
|
|
|
|
|
return ENOBUFS;
|
|
|
|
|
}
|
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
|
buf = ofpbuf_new(ODPUTIL_FLOW_KEY_BYTES + 2 + packet->size);
|
|
|
|
|
odp_flow_key_from_flow(buf, flow);
|
|
|
|
|
key_len = buf->size;
|
|
|
|
|
ofpbuf_pull(buf, key_len);
|
|
|
|
|
ofpbuf_reserve(buf, 2);
|
|
|
|
|
ofpbuf_put(buf, packet->data, packet->size);
|
|
|
|
|
|
|
|
|
|
upcall = xzalloc(sizeof *upcall);
|
|
|
|
|
upcall->type = queue_no;
|
|
|
|
|
upcall->packet = buf;
|
|
|
|
|
upcall->key = buf->base;
|
|
|
|
|
upcall->key_len = key_len;
|
|
|
|
|
upcall->userdata = arg;
|
|
|
|
|
|
2011-02-15 10:07:20 -08:00
|
|
|
|
q->upcalls[q->head++ & QUEUE_MASK] = upcall;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-11 11:07:14 -07:00
|
|
|
|
static void
|
|
|
|
|
dp_netdev_sample(struct dp_netdev *dp,
|
|
|
|
|
struct ofpbuf *packet, struct flow *key,
|
|
|
|
|
const struct nlattr *action)
|
|
|
|
|
{
|
|
|
|
|
const struct nlattr *subactions = NULL;
|
|
|
|
|
const struct nlattr *a;
|
|
|
|
|
size_t left;
|
|
|
|
|
|
|
|
|
|
NL_NESTED_FOR_EACH_UNSAFE (a, left, action) {
|
|
|
|
|
int type = nl_attr_type(a);
|
|
|
|
|
|
|
|
|
|
switch ((enum ovs_sample_attr) type) {
|
|
|
|
|
case OVS_SAMPLE_ATTR_PROBABILITY:
|
|
|
|
|
if (random_uint32() >= nl_attr_get_u32(a)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_SAMPLE_ATTR_ACTIONS:
|
|
|
|
|
subactions = a;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_SAMPLE_ATTR_UNSPEC:
|
|
|
|
|
case __OVS_SAMPLE_ATTR_MAX:
|
|
|
|
|
default:
|
|
|
|
|
NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dp_netdev_execute_actions(dp, packet, key, nl_attr_get(subactions),
|
|
|
|
|
nl_attr_get_size(subactions));
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-12 16:24:54 -07:00
|
|
|
|
static void
|
|
|
|
|
dp_netdev_action_userspace(struct dp_netdev *dp,
|
|
|
|
|
struct ofpbuf *packet, struct flow *key,
|
|
|
|
|
const struct nlattr *a)
|
|
|
|
|
{
|
|
|
|
|
const struct nlattr *userdata_attr;
|
|
|
|
|
uint64_t userdata;
|
|
|
|
|
|
|
|
|
|
userdata_attr = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
|
|
|
|
|
userdata = userdata_attr ? nl_attr_get_u64(userdata_attr) : 0;
|
|
|
|
|
dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, userdata);
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-21 14:38:54 -07:00
|
|
|
|
static void
|
|
|
|
|
execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
|
|
|
|
|
{
|
|
|
|
|
enum ovs_key_attr type = nl_attr_type(a);
|
|
|
|
|
switch (type) {
|
|
|
|
|
case OVS_KEY_ATTR_TUN_ID:
|
2011-11-01 10:13:16 -07:00
|
|
|
|
case OVS_KEY_ATTR_PRIORITY:
|
|
|
|
|
/* not implemented */
|
2011-10-21 14:38:54 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_KEY_ATTR_ETHERNET:
|
|
|
|
|
dp_netdev_set_dl(packet,
|
|
|
|
|
nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet)));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_KEY_ATTR_IPV4:
|
|
|
|
|
dp_netdev_set_ipv4(packet,
|
|
|
|
|
nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4)));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_KEY_ATTR_TCP:
|
|
|
|
|
dp_netdev_set_tcp_port(packet,
|
|
|
|
|
nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp)));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_KEY_ATTR_UDP:
|
|
|
|
|
dp_netdev_set_udp_port(packet,
|
|
|
|
|
nl_attr_get_unspec(a, sizeof(struct ovs_key_udp)));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_KEY_ATTR_UNSPEC:
|
2011-11-14 15:56:43 -08:00
|
|
|
|
case OVS_KEY_ATTR_ENCAP:
|
2011-10-21 14:38:54 -07:00
|
|
|
|
case OVS_KEY_ATTR_ETHERTYPE:
|
|
|
|
|
case OVS_KEY_ATTR_IPV6:
|
|
|
|
|
case OVS_KEY_ATTR_IN_PORT:
|
2011-11-14 15:56:43 -08:00
|
|
|
|
case OVS_KEY_ATTR_VLAN:
|
2011-10-21 14:38:54 -07:00
|
|
|
|
case OVS_KEY_ATTR_ICMP:
|
|
|
|
|
case OVS_KEY_ATTR_ICMPV6:
|
|
|
|
|
case OVS_KEY_ATTR_ARP:
|
|
|
|
|
case OVS_KEY_ATTR_ND:
|
|
|
|
|
case __OVS_KEY_ATTR_MAX:
|
|
|
|
|
default:
|
|
|
|
|
NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dp_netdev_execute_actions(struct dp_netdev *dp,
|
2010-10-11 13:31:35 -07:00
|
|
|
|
struct ofpbuf *packet, struct flow *key,
|
2010-12-10 10:40:58 -08:00
|
|
|
|
const struct nlattr *actions,
|
2010-12-11 22:51:31 -08:00
|
|
|
|
size_t actions_len)
|
2009-06-19 14:09:39 -07:00
|
|
|
|
{
|
2010-12-10 10:40:58 -08:00
|
|
|
|
const struct nlattr *a;
|
|
|
|
|
unsigned int left;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2010-12-10 10:40:58 -08:00
|
|
|
|
NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
|
2011-11-14 15:56:43 -08:00
|
|
|
|
const struct ovs_action_push_vlan *vlan;
|
2011-10-11 11:07:14 -07:00
|
|
|
|
int type = nl_attr_type(a);
|
|
|
|
|
|
2011-10-05 09:59:51 -07:00
|
|
|
|
switch ((enum ovs_action_attr) type) {
|
2011-08-18 10:35:40 -07:00
|
|
|
|
case OVS_ACTION_ATTR_OUTPUT:
|
2010-12-10 10:40:58 -08:00
|
|
|
|
dp_netdev_output_port(dp, packet, nl_attr_get_u32(a));
|
2010-08-24 16:00:46 -07:00
|
|
|
|
break;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
|
case OVS_ACTION_ATTR_USERSPACE:
|
2011-10-12 16:24:54 -07:00
|
|
|
|
dp_netdev_action_userspace(dp, packet, key, a);
|
2010-08-24 16:00:46 -07:00
|
|
|
|
break;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2011-11-14 15:56:43 -08:00
|
|
|
|
case OVS_ACTION_ATTR_PUSH_VLAN:
|
|
|
|
|
vlan = nl_attr_get(a);
|
2012-01-03 10:42:56 -08:00
|
|
|
|
eth_push_vlan(packet, vlan->vlan_tci);
|
2009-06-19 14:09:39 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2011-11-14 15:56:43 -08:00
|
|
|
|
case OVS_ACTION_ATTR_POP_VLAN:
|
2011-11-14 14:02:43 -08:00
|
|
|
|
eth_pop_vlan(packet);
|
2010-08-24 16:00:46 -07:00
|
|
|
|
break;
|
2009-06-19 14:09:39 -07:00
|
|
|
|
|
2011-10-21 14:38:54 -07:00
|
|
|
|
case OVS_ACTION_ATTR_SET:
|
|
|
|
|
execute_set_action(packet, nl_attr_get(a));
|
2010-08-24 16:00:46 -07:00
|
|
|
|
break;
|
2011-10-11 11:07:14 -07:00
|
|
|
|
|
|
|
|
|
case OVS_ACTION_ATTR_SAMPLE:
|
|
|
|
|
dp_netdev_sample(dp, packet, key, a);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVS_ACTION_ATTR_UNSPEC:
|
|
|
|
|
case __OVS_ACTION_ATTR_MAX:
|
|
|
|
|
NOT_REACHED();
|
2010-08-24 16:00:46 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
2009-06-19 14:09:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct dpif_class dpif_netdev_class = {
|
|
|
|
|
"netdev",
|
2009-07-06 11:06:36 -07:00
|
|
|
|
NULL, /* enumerate */
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif_netdev_open,
|
|
|
|
|
dpif_netdev_close,
|
2010-02-08 13:22:41 -05:00
|
|
|
|
dpif_netdev_destroy,
|
2011-05-06 15:04:29 -07:00
|
|
|
|
dpif_netdev_run,
|
|
|
|
|
dpif_netdev_wait,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif_netdev_get_stats,
|
|
|
|
|
dpif_netdev_port_add,
|
|
|
|
|
dpif_netdev_port_del,
|
|
|
|
|
dpif_netdev_port_query_by_number,
|
|
|
|
|
dpif_netdev_port_query_by_name,
|
2011-01-26 09:24:59 -08:00
|
|
|
|
dpif_netdev_get_max_ports,
|
2011-10-12 16:24:54 -07:00
|
|
|
|
NULL, /* port_get_pid */
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
|
dpif_netdev_port_dump_start,
|
|
|
|
|
dpif_netdev_port_dump_next,
|
|
|
|
|
dpif_netdev_port_dump_done,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif_netdev_port_poll,
|
|
|
|
|
dpif_netdev_port_poll_wait,
|
|
|
|
|
dpif_netdev_flow_get,
|
|
|
|
|
dpif_netdev_flow_put,
|
|
|
|
|
dpif_netdev_flow_del,
|
|
|
|
|
dpif_netdev_flow_flush,
|
datapath: Change listing flows to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length. This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call. It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.
As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow. Now dumping a flow and its actions is
a single step, closing that window.
Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process. It doesn't
look like this should be a problem for ovs-vswitchd.
It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2010-12-28 10:39:52 -08:00
|
|
|
|
dpif_netdev_flow_dump_start,
|
|
|
|
|
dpif_netdev_flow_dump_next,
|
|
|
|
|
dpif_netdev_flow_dump_done,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif_netdev_execute,
|
2011-09-27 15:08:50 -07:00
|
|
|
|
NULL, /* operate */
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif_netdev_recv_get_mask,
|
|
|
|
|
dpif_netdev_recv_set_mask,
|
2011-11-21 13:36:17 -08:00
|
|
|
|
dpif_netdev_queue_to_priority,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
dpif_netdev_recv,
|
|
|
|
|
dpif_netdev_recv_wait,
|
2011-01-04 17:00:36 -08:00
|
|
|
|
dpif_netdev_recv_purge,
|
2009-06-19 14:09:39 -07:00
|
|
|
|
};
|
2010-11-29 12:21:08 -08:00
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
dpif_dummy_register(void)
|
|
|
|
|
{
|
|
|
|
|
if (!dpif_dummy_class.type) {
|
|
|
|
|
dpif_dummy_class = dpif_netdev_class;
|
|
|
|
|
dpif_dummy_class.type = "dummy";
|
|
|
|
|
dp_register_provider(&dpif_dummy_class);
|
|
|
|
|
}
|
|
|
|
|
}
|