mirror of
https://github.com/openvswitch/ovs
synced 2025-10-13 14:07:02 +00:00
One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to add new features to the kernel vport layer without changing userspace software. The customary way to do this in the Linux networking stack is to use Netlink and in particular Netlink attributes. This commit adopts that model for the vport layer. It does not yet actually start using the Netlink socket layer, which will come later. Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
249 lines
8.0 KiB
C
249 lines
8.0 KiB
C
/*
|
|
* Copyright (c) 2010, 2011 Nicira Networks.
|
|
* Distributed under the terms of the GNU GPL version 2.
|
|
*
|
|
* Significant portions of this file may be copied from parts of the Linux
|
|
* kernel, by Linus Torvalds and others.
|
|
*/
|
|
|
|
#ifndef TUNNEL_H
|
|
#define TUNNEL_H 1
|
|
|
|
#include <linux/version.h>
|
|
|
|
#include "flow.h"
|
|
#include "openvswitch/tunnel.h"
|
|
#include "table.h"
|
|
#include "vport.h"
|
|
|
|
/*
|
|
* The absolute minimum fragment size. Note that there are many other
|
|
* definitions of the minimum MTU.
|
|
*/
|
|
#define IP_MIN_MTU 68
|
|
|
|
/*
|
|
* One of these goes in struct tnl_ops and in tnl_find_port().
|
|
* These values are in the same namespace as other TNL_T_* values, so
|
|
* only the least significant 10 bits are available to define protocol
|
|
* identifiers.
|
|
*/
|
|
#define TNL_T_PROTO_GRE 0
|
|
#define TNL_T_PROTO_CAPWAP 1
|
|
|
|
/* These flags are only needed when calling tnl_find_port(). */
|
|
#define TNL_T_KEY_EXACT (1 << 10)
|
|
#define TNL_T_KEY_MATCH (1 << 11)
|
|
#define TNL_T_KEY_EITHER (TNL_T_KEY_EXACT | TNL_T_KEY_MATCH)
|
|
|
|
/* Private flags not exposed to userspace in this form. */
|
|
#define TNL_F_IN_KEY_MATCH (1 << 16) /* Store the key in tun_id to match in flow table. */
|
|
#define TNL_F_OUT_KEY_ACTION (1 << 17) /* Get the key from a SET_TUNNEL action. */
|
|
|
|
/* All public tunnel flags. */
|
|
#define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \
|
|
TNL_F_PMTUD | TNL_F_HDR_CACHE | TNL_F_IPSEC)
|
|
|
|
/**
|
|
* struct tnl_mutable_config - modifiable configuration for a tunnel.
|
|
* @rcu: RCU callback head for deferred destruction.
|
|
* @seq: Sequence number for distinguishing configuration versions.
|
|
* @tunnel_type: Set of TNL_T_* flags that define lookup.
|
|
* @tunnel_hlen: Tunnel header length.
|
|
* @eth_addr: Source address for packets generated by tunnel itself
|
|
* (e.g. ICMP fragmentation needed messages).
|
|
* @mtu: MTU of tunnel.
|
|
* @in_key: Key to match on input, 0 for wildcard.
|
|
* @out_key: Key to use on output, 0 if this tunnel has no fixed output key.
|
|
* @flags: TNL_F_* flags.
|
|
* @saddr: IPv4 source address to match, 0 to accept any source address.
|
|
* @daddr: IPv4 destination of tunnel.
|
|
* @tos: IPv4 TOS value to use for tunnel, 0 if no fixed TOS.
|
|
* @ttl: IPv4 TTL value to use for tunnel, 0 if no fixed TTL.
|
|
*/
|
|
struct tnl_mutable_config {
|
|
struct rcu_head rcu;
|
|
|
|
unsigned seq;
|
|
|
|
u32 tunnel_type;
|
|
unsigned tunnel_hlen;
|
|
|
|
unsigned char eth_addr[ETH_ALEN];
|
|
unsigned mtu;
|
|
|
|
/* Configured via ODP_TUNNEL_ATTR_* attributes. */
|
|
__be64 in_key;
|
|
__be64 out_key;
|
|
u32 flags;
|
|
__be32 saddr;
|
|
__be32 daddr;
|
|
u8 tos;
|
|
u8 ttl;
|
|
};
|
|
|
|
struct tnl_ops {
|
|
u32 tunnel_type; /* Put the TNL_T_PROTO_* type in here. */
|
|
u8 ipproto; /* The IP protocol for the tunnel. */
|
|
|
|
/*
|
|
* Returns the length of the tunnel header that will be added in
|
|
* build_header() (i.e. excludes the IP header). Returns a negative
|
|
* error code if the configuration is invalid.
|
|
*/
|
|
int (*hdr_len)(const struct tnl_mutable_config *);
|
|
|
|
/*
|
|
* Builds the static portion of the tunnel header, which is stored in
|
|
* the header cache. In general the performance of this function is
|
|
* not too important as we try to only call it when building the cache
|
|
* so it is preferable to shift as much work as possible here. However,
|
|
* in some circumstances caching is disabled and this function will be
|
|
* called for every packet, so try not to make it too slow.
|
|
*/
|
|
void (*build_header)(const struct vport *,
|
|
const struct tnl_mutable_config *, void *header);
|
|
|
|
/*
|
|
* Updates the cached header of a packet to match the actual packet
|
|
* data. Typical things that might need to be updated are length,
|
|
* checksum, etc. The IP header will have already been updated and this
|
|
* is the final step before transmission. Returns a linked list of
|
|
* completed SKBs (multiple packets may be generated in the event
|
|
* of fragmentation).
|
|
*/
|
|
struct sk_buff *(*update_header)(const struct vport *,
|
|
const struct tnl_mutable_config *,
|
|
struct dst_entry *, struct sk_buff *);
|
|
};
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
/*
|
|
* On these kernels we have a fast mechanism to tell if the ARP cache for a
|
|
* particular destination has changed.
|
|
*/
|
|
#define HAVE_HH_SEQ
|
|
#endif
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
|
|
/*
|
|
* On these kernels we have a fast mechanism to tell if the routing table
|
|
* has changed.
|
|
*/
|
|
#define HAVE_RT_GENID
|
|
#endif
|
|
#if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID)
|
|
/* If we can't detect all system changes directly we need to use a timeout. */
|
|
#define NEED_CACHE_TIMEOUT
|
|
#endif
|
|
struct tnl_cache {
|
|
struct rcu_head rcu;
|
|
|
|
int len; /* Length of data to be memcpy'd from cache. */
|
|
|
|
/* Sequence number of mutable->seq from which this cache was generated. */
|
|
unsigned mutable_seq;
|
|
|
|
#ifdef HAVE_HH_SEQ
|
|
/*
|
|
* The sequence number from the seqlock protecting the hardware header
|
|
* cache (in the ARP cache). Since every write increments the counter
|
|
* this gives us an easy way to tell if it has changed.
|
|
*/
|
|
unsigned hh_seq;
|
|
#endif
|
|
|
|
#ifdef NEED_CACHE_TIMEOUT
|
|
/*
|
|
* If we don't have direct mechanisms to detect all important changes in
|
|
* the system fall back to an expiration time. This expiration time
|
|
* can be relatively short since at high rates there will be millions of
|
|
* packets per second, so we'll still get plenty of benefit from the
|
|
* cache. Note that if something changes we may blackhole packets
|
|
* until the expiration time (depending on what changed and the kernel
|
|
* version we may be able to detect the change sooner). Expiration is
|
|
* expressed as a time in jiffies.
|
|
*/
|
|
unsigned long expiration;
|
|
#endif
|
|
|
|
/*
|
|
* The routing table entry that is the result of looking up the tunnel
|
|
* endpoints. It also contains a sequence number (called a generation
|
|
* ID) that can be compared to a global sequence to tell if the routing
|
|
* table has changed (and therefore there is a potential that this
|
|
* cached route has been invalidated).
|
|
*/
|
|
struct rtable *rt;
|
|
|
|
/*
|
|
* If the output device for tunnel traffic is an OVS internal device,
|
|
* the flow of that datapath. Since all tunnel traffic will have the
|
|
* same headers this allows us to cache the flow lookup. NULL if the
|
|
* output device is not OVS or if there is no flow installed.
|
|
*/
|
|
struct sw_flow *flow;
|
|
|
|
/* The cached header follows after padding for alignment. */
|
|
};
|
|
|
|
struct tnl_vport {
|
|
struct rcu_head rcu;
|
|
struct tbl_node tbl_node;
|
|
|
|
char name[IFNAMSIZ];
|
|
const struct tnl_ops *tnl_ops;
|
|
|
|
struct tnl_mutable_config __rcu *mutable;
|
|
|
|
/*
|
|
* ID of last fragment sent (for tunnel protocols with direct support
|
|
* fragmentation). If the protocol relies on IP fragmentation then
|
|
* this is not needed.
|
|
*/
|
|
atomic_t frag_id;
|
|
|
|
spinlock_t cache_lock;
|
|
struct tnl_cache __rcu *cache; /* Protected by RCU/cache_lock. */
|
|
|
|
#ifdef NEED_CACHE_TIMEOUT
|
|
/*
|
|
* If we must rely on expiration time to invalidate the cache, this is
|
|
* the interval. It is randomized within a range (defined by
|
|
* MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused
|
|
* by creation of a large number of tunnels at a one time.
|
|
*/
|
|
unsigned long cache_exp_interval;
|
|
#endif
|
|
};
|
|
|
|
struct vport *tnl_create(const struct vport_parms *, const struct vport_ops *,
|
|
const struct tnl_ops *);
|
|
int tnl_destroy(struct vport *);
|
|
|
|
int tnl_set_options(struct vport *, struct nlattr *);
|
|
int tnl_get_options(const struct vport *, struct sk_buff *);
|
|
|
|
int tnl_set_mtu(struct vport *vport, int mtu);
|
|
int tnl_set_addr(struct vport *vport, const unsigned char *addr);
|
|
const char *tnl_get_name(const struct vport *vport);
|
|
const unsigned char *tnl_get_addr(const struct vport *vport);
|
|
int tnl_get_mtu(const struct vport *vport);
|
|
int tnl_send(struct vport *vport, struct sk_buff *skb);
|
|
void tnl_rcv(struct vport *vport, struct sk_buff *skb);
|
|
|
|
struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
|
|
int tunnel_type,
|
|
const struct tnl_mutable_config **mutable);
|
|
bool tnl_frag_needed(struct vport *vport,
|
|
const struct tnl_mutable_config *mutable,
|
|
struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
|
|
void tnl_free_linked_skbs(struct sk_buff *skb);
|
|
|
|
static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
|
|
{
|
|
return vport_priv(vport);
|
|
}
|
|
|
|
|
|
#endif /* tunnel.h */
|