2009-06-15 15:11:30 -07:00
|
|
|
/*
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
* Copyright (c) 2009, 2010, 2011 Nicira Networks.
|
2009-06-15 15:11:30 -07:00
|
|
|
* Distributed under the terms of the GNU GPL version 2.
|
|
|
|
*
|
|
|
|
* Significant portions of this file may be copied from parts of the Linux
|
|
|
|
* kernel, by Linus Torvalds and others.
|
|
|
|
*/
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
/* Interface exported by openvswitch_mod. */
|
|
|
|
|
|
|
|
#ifndef DATAPATH_H
|
|
|
|
#define DATAPATH_H 1
|
|
|
|
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/netdevice.h>
|
2010-07-28 18:20:43 -07:00
|
|
|
#include <linux/seqlock.h>
|
2009-07-08 13:19:16 -07:00
|
|
|
#include <linux/skbuff.h>
|
2009-06-12 16:45:01 -07:00
|
|
|
#include <linux/version.h>
|
2010-11-22 14:17:24 -08:00
|
|
|
|
|
|
|
#include "checksum.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
#include "flow.h"
|
2009-08-05 12:56:23 -07:00
|
|
|
#include "dp_sysfs.h"
|
2010-12-30 20:48:38 -08:00
|
|
|
#include "vlan.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2010-04-12 15:53:39 -04:00
|
|
|
struct vport;
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
/* Mask for the priority bits in a vlan header. If we ever merge upstream
|
|
|
|
* then this should go into include/linux/if_vlan.h. */
|
|
|
|
#define VLAN_PCP_MASK 0xe000
|
2009-10-08 10:37:43 -07:00
|
|
|
#define VLAN_PCP_SHIFT 13
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2009-09-11 14:32:50 -07:00
|
|
|
#define DP_MAX_PORTS 1024
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2010-01-08 16:45:14 -08:00
|
|
|
/**
|
|
|
|
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
|
|
|
|
* datapath.
|
|
|
|
* @n_frags: Number of IP fragments processed by datapath.
|
|
|
|
* @n_hit: Number of received packets for which a matching flow was found in
|
|
|
|
* the flow table.
|
|
|
|
* @n_miss: Number of received packets that had no matching flow in the flow
|
|
|
|
* table. The sum of @n_hit and @n_miss is the number of packets that have
|
|
|
|
* been received by the datapath.
|
|
|
|
* @n_lost: Number of received packets that had no matching flow in the flow
|
|
|
|
* table that could not be sent to userspace (normally due to an overflow in
|
|
|
|
* one of the datapath's queues).
|
|
|
|
*/
|
2009-07-08 13:19:16 -07:00
|
|
|
struct dp_stats_percpu {
|
|
|
|
u64 n_frags;
|
|
|
|
u64 n_hit;
|
|
|
|
u64 n_missed;
|
|
|
|
u64 n_lost;
|
2010-07-28 18:20:43 -07:00
|
|
|
seqcount_t seqlock;
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2010-01-04 13:08:37 -08:00
|
|
|
/**
|
|
|
|
* struct datapath - datapath for flow-based packet switching
|
2011-01-05 12:39:57 -08:00
|
|
|
* @rcu: RCU callback head for deferred destruction.
|
2011-01-21 17:01:56 -08:00
|
|
|
* @dp_ifindex: ifindex of local port.
|
|
|
|
* @list_node: Element in global 'dps' list.
|
2011-01-26 12:49:06 -08:00
|
|
|
* @ifobj: Represents /sys/class/net/<devname>/brif. Protected by RTNL.
|
2010-01-04 13:08:37 -08:00
|
|
|
* @drop_frags: Drop all IP fragments if nonzero.
|
|
|
|
* @n_flows: Number of flows currently in flow table.
|
2011-01-26 12:49:06 -08:00
|
|
|
* @table: Current flow table. Protected by genl_lock and RCU.
|
2010-12-03 13:09:26 -08:00
|
|
|
* @ports: Map from port number to &struct vport. %ODPP_LOCAL port
|
2011-01-26 12:49:06 -08:00
|
|
|
* always exists, other ports may be %NULL. Protected by RTNL and RCU.
|
|
|
|
* @port_list: List of all ports in @ports in arbitrary order. RTNL required
|
|
|
|
* to iterate or modify.
|
2010-01-04 13:08:37 -08:00
|
|
|
* @stats_percpu: Per-CPU datapath statistics.
|
2010-01-08 16:44:43 -08:00
|
|
|
* @sflow_probability: Number of packets out of UINT_MAX to sample to the
|
2011-01-26 13:41:54 -08:00
|
|
|
* %ODP_PACKET_CMD_SAMPLE multicast group, e.g. (@sflow_probability/UINT_MAX)
|
|
|
|
* is the probability of sampling a given packet.
|
2011-01-26 12:49:06 -08:00
|
|
|
*
|
|
|
|
* Context: See the comment on locking at the top of datapath.c for additional
|
|
|
|
* locking information.
|
2010-01-04 13:08:37 -08:00
|
|
|
*/
|
2009-07-08 13:19:16 -07:00
|
|
|
struct datapath {
|
2011-01-05 12:39:57 -08:00
|
|
|
struct rcu_head rcu;
|
2011-01-21 17:01:56 -08:00
|
|
|
int dp_ifindex;
|
|
|
|
struct list_head list_node;
|
2009-07-08 13:19:16 -07:00
|
|
|
struct kobject ifobj;
|
|
|
|
|
|
|
|
int drop_frags;
|
|
|
|
|
|
|
|
/* Flow table. */
|
2010-12-04 11:50:53 -08:00
|
|
|
struct tbl __rcu *table;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
/* Switch ports. */
|
2010-12-04 11:50:53 -08:00
|
|
|
struct vport __rcu *ports[DP_MAX_PORTS];
|
2010-01-04 13:08:37 -08:00
|
|
|
struct list_head port_list;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
/* Stats. */
|
2010-12-04 11:39:53 -08:00
|
|
|
struct dp_stats_percpu __percpu *stats_percpu;
|
2010-01-04 13:08:37 -08:00
|
|
|
|
|
|
|
/* sFlow Sampling */
|
|
|
|
unsigned int sflow_probability;
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2010-02-28 12:17:16 -05:00
|
|
|
/**
|
|
|
|
* struct ovs_skb_cb - OVS data in skb CB
|
2010-12-03 13:09:26 -08:00
|
|
|
* @vport: The datapath port on which the skb entered the switch.
|
2010-08-29 10:49:11 -07:00
|
|
|
* @flow: The flow associated with this packet. May be %NULL if no flow.
|
2010-02-28 12:17:16 -05:00
|
|
|
* @ip_summed: Consistently stores L4 checksumming status across different
|
|
|
|
* kernel versions.
|
2010-12-10 10:42:42 -08:00
|
|
|
* @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the
|
|
|
|
* packet was not received on a tunnel.
|
2010-12-30 20:48:38 -08:00
|
|
|
* @vlan_tci: Provides a substitute for the skb->vlan_tci field on kernels
|
|
|
|
* before 2.6.27.
|
2010-02-28 12:17:16 -05:00
|
|
|
*/
|
|
|
|
struct ovs_skb_cb {
|
2010-12-03 13:09:26 -08:00
|
|
|
struct vport *vport;
|
2010-08-29 10:49:11 -07:00
|
|
|
struct sw_flow *flow;
|
2010-11-22 14:17:24 -08:00
|
|
|
#ifdef NEED_CSUM_NORMALIZE
|
2010-04-12 11:49:16 -04:00
|
|
|
enum csum_type ip_summed;
|
2010-11-22 14:17:24 -08:00
|
|
|
#endif
|
2010-12-10 10:42:42 -08:00
|
|
|
__be64 tun_id;
|
2010-12-30 20:48:38 -08:00
|
|
|
#ifdef NEED_VLAN_FIELD
|
|
|
|
u16 vlan_tci;
|
|
|
|
#endif
|
2010-02-28 12:17:16 -05:00
|
|
|
};
|
|
|
|
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
|
|
|
|
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
/**
|
|
|
|
* struct dp_upcall - metadata to include with a packet to send to userspace
|
2011-01-26 13:41:54 -08:00
|
|
|
* @cmd: One of %ODP_PACKET_CMD_*.
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
* @key: Becomes %ODP_PACKET_ATTR_KEY. Must be nonnull.
|
|
|
|
* @userdata: Becomes %ODP_PACKET_ATTR_USERDATA if nonzero.
|
|
|
|
* @sample_pool: Becomes %ODP_PACKET_ATTR_SAMPLE_POOL if nonzero.
|
|
|
|
* @actions: Becomes %ODP_PACKET_ATTR_ACTIONS if nonnull.
|
|
|
|
* @actions_len: Number of bytes in @actions.
|
|
|
|
*/
|
|
|
|
struct dp_upcall_info {
|
2011-01-26 13:41:54 -08:00
|
|
|
u8 cmd;
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
const struct sw_flow_key *key;
|
|
|
|
u64 userdata;
|
|
|
|
u32 sample_pool;
|
|
|
|
const struct nlattr *actions;
|
|
|
|
u32 actions_len;
|
|
|
|
};
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
extern struct notifier_block dp_device_notifier;
|
|
|
|
extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
|
|
|
|
|
2010-12-03 13:09:26 -08:00
|
|
|
void dp_process_received_packet(struct vport *, struct sk_buff *);
|
|
|
|
int dp_detach_port(struct vport *);
|
datapath: Report kernel's flow key when passing packets up to userspace.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace. This means that userspace can intelligently figure out
what to do:
- If userspace's notion of the flow for the packet matches the kernel's,
then nothing special is necessary.
- If the kernel has a more specific notion for the flow than userspace,
for example if the kernel decoded IPv6 headers but userspace stopped
at the Ethernet type (because it does not understand IPv6), then again
nothing special is necessary: userspace can still set up the flow in
the usual way.
- If userspace has a more specific notion for the flow than the kernel,
for example if userspace decoded an IPv6 header but the kernel
stopped at the Ethernet type, then userspace can forward the packet
manually, without setting up a flow in the kernel. (This case is
bad from a performance point of view, but at least it is correct.)
This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently. This will have to wait
for later commits.
This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes. The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.
This commit will slow down performance of checksumming packets sent up to
userspace. I'm not entirely pleased with how I did it. I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome. Not changing anything wasn't an option,
unfortunately. At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.
(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace. But that's not perfect either.)
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
|
|
|
int dp_upcall(struct datapath *, struct sk_buff *, const struct dp_upcall_info *);
|
2009-08-01 00:09:56 -07:00
|
|
|
int dp_min_mtu(const struct datapath *dp);
|
2010-04-12 15:53:39 -04:00
|
|
|
void set_internal_devs_mtu(const struct datapath *dp);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
struct datapath *get_dp(int dp_idx);
|
2010-04-12 15:53:39 -04:00
|
|
|
const char *dp_name(const struct datapath *dp);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
#endif /* datapath.h */
|