2010-08-10 20:11:48 -04:00
|
|
|
/*
|
2011-01-06 13:18:10 -08:00
|
|
|
* Copyright (c) 2010, 2011 Nicira Networks.
|
2010-08-10 20:11:48 -04:00
|
|
|
* Distributed under the terms of the GNU GPL version 2.
|
|
|
|
|
*
|
|
|
|
|
* Significant portions of this file may be copied from parts of the Linux
|
|
|
|
|
* kernel, by Linus Torvalds and others.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <linux/if_arp.h>
|
|
|
|
|
#include <linux/if_ether.h>
|
|
|
|
|
#include <linux/ip.h>
|
|
|
|
|
#include <linux/if_vlan.h>
|
2011-10-24 12:27:36 -07:00
|
|
|
#include <linux/igmp.h>
|
2010-08-10 20:11:48 -04:00
|
|
|
#include <linux/in.h>
|
|
|
|
|
#include <linux/in_route.h>
|
2011-10-24 12:27:36 -07:00
|
|
|
#include <linux/inetdevice.h>
|
2010-08-10 20:11:48 -04:00
|
|
|
#include <linux/jhash.h>
|
2011-09-09 19:09:47 -07:00
|
|
|
#include <linux/list.h>
|
2010-08-10 20:11:48 -04:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
|
#include <linux/version.h>
|
2010-08-27 13:55:02 -07:00
|
|
|
#include <linux/workqueue.h>
|
2011-09-09 19:09:47 -07:00
|
|
|
#include <linux/rculist.h>
|
2010-08-10 20:11:48 -04:00
|
|
|
|
|
|
|
|
#include <net/dsfield.h>
|
|
|
|
|
#include <net/dst.h>
|
|
|
|
|
#include <net/icmp.h>
|
|
|
|
|
#include <net/inet_ecn.h>
|
|
|
|
|
#include <net/ip.h>
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
#include <net/ipv6.h>
|
|
|
|
|
#endif
|
|
|
|
|
#include <net/route.h>
|
|
|
|
|
#include <net/xfrm.h>
|
|
|
|
|
|
|
|
|
|
#include "actions.h"
|
2010-11-22 14:17:24 -08:00
|
|
|
#include "checksum.h"
|
2010-08-10 20:11:48 -04:00
|
|
|
#include "datapath.h"
|
|
|
|
|
#include "tunnel.h"
|
2010-12-29 22:13:15 -08:00
|
|
|
#include "vlan.h"
|
2010-08-10 20:11:48 -04:00
|
|
|
#include "vport.h"
|
|
|
|
|
#include "vport-generic.h"
|
2010-08-27 13:55:02 -07:00
|
|
|
#include "vport-internal_dev.h"
|
|
|
|
|
|
|
|
|
|
#ifdef NEED_CACHE_TIMEOUT
|
|
|
|
|
/*
|
|
|
|
|
* On kernels where we can't quickly detect changes in the rest of the system
|
|
|
|
|
* we use an expiration time to invalidate the cache. A shorter expiration
|
|
|
|
|
* reduces the length of time that we may potentially blackhole packets while
|
|
|
|
|
* a longer time increases performance by reducing the frequency that the
|
|
|
|
|
* cache needs to be rebuilt. A variety of factors may cause the cache to be
|
|
|
|
|
* invalidated before the expiration time but this is the maximum. The time
|
|
|
|
|
* is expressed in jiffies.
|
|
|
|
|
*/
|
|
|
|
|
#define MAX_CACHE_EXP HZ
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Interval to check for and remove caches that are no longer valid. Caches
|
|
|
|
|
* are checked for validity before they are used for packet encapsulation and
|
|
|
|
|
* old caches are removed at that time. However, if no packets are sent through
|
|
|
|
|
* the tunnel then the cache will never be destroyed. Since it holds
|
|
|
|
|
* references to a number of system objects, the cache will continue to use
|
|
|
|
|
* system resources by not allowing those objects to be destroyed. The cache
|
|
|
|
|
* cleaner is periodically run to free invalid caches. It does not
|
|
|
|
|
* significantly affect system performance. A lower interval will release
|
|
|
|
|
* resources faster but will itself consume resources by requiring more frequent
|
|
|
|
|
* checks. A longer interval may result in messages being printed to the kernel
|
|
|
|
|
* message buffer about unreleased resources. The interval is expressed in
|
|
|
|
|
* jiffies.
|
|
|
|
|
*/
|
|
|
|
|
#define CACHE_CLEANER_INTERVAL (5 * HZ)
|
|
|
|
|
|
|
|
|
|
#define CACHE_DATA_ALIGN 16
|
2011-09-09 19:09:47 -07:00
|
|
|
#define PORT_TABLE_SIZE 1024
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
static struct hlist_head *port_table __read_mostly;
|
|
|
|
|
static int port_table_count;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static void cache_cleaner(struct work_struct *work);
|
2010-12-04 13:52:25 -08:00
|
|
|
static DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
/*
|
|
|
|
|
* These are just used as an optimization: they don't require any kind of
|
|
|
|
|
* synchronization because we could have just as easily read the value before
|
|
|
|
|
* the port change happened.
|
|
|
|
|
*/
|
2010-11-23 16:34:22 -08:00
|
|
|
static unsigned int key_local_remote_ports __read_mostly;
|
|
|
|
|
static unsigned int key_remote_ports __read_mostly;
|
|
|
|
|
static unsigned int local_remote_ports __read_mostly;
|
|
|
|
|
static unsigned int remote_ports __read_mostly;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
|
|
|
|
|
#define rt_dst(rt) (rt->dst)
|
|
|
|
|
#else
|
|
|
|
|
#define rt_dst(rt) (rt->u.dst)
|
|
|
|
|
#endif
|
|
|
|
|
|
2011-10-21 14:16:59 -07:00
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
|
|
|
|
|
static struct hh_cache *rt_hh(struct rtable *rt)
|
|
|
|
|
{
|
|
|
|
|
struct neighbour *neigh = dst_get_neighbour(&rt->dst);
|
|
|
|
|
if (!neigh || !(neigh->nud_state & NUD_CONNECTED) ||
|
|
|
|
|
!neigh->hh.hh_len)
|
|
|
|
|
return NULL;
|
|
|
|
|
return &neigh->hh;
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
#define rt_hh(rt) (rt_dst(rt).hh)
|
|
|
|
|
#endif
|
|
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
static inline struct vport *tnl_vport_to_vport(const struct tnl_vport *tnl_vport)
|
|
|
|
|
{
|
|
|
|
|
return vport_from_priv(tnl_vport);
|
|
|
|
|
}
|
|
|
|
|
|
2010-12-05 12:36:36 -08:00
|
|
|
/* This is analogous to rtnl_dereference for the tunnel cache. It checks that
|
|
|
|
|
* cache_lock is held, so it is only for update side code.
|
|
|
|
|
*/
|
|
|
|
|
static inline struct tnl_cache *cache_dereference(struct tnl_vport *tnl_vport)
|
|
|
|
|
{
|
|
|
|
|
return rcu_dereference_protected(tnl_vport->cache,
|
|
|
|
|
lockdep_is_held(&tnl_vport->cache_lock));
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static inline void schedule_cache_cleaner(void)
|
|
|
|
|
{
|
|
|
|
|
schedule_delayed_work(&cache_cleaner_wq, CACHE_CLEANER_INTERVAL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void free_cache(struct tnl_cache *cache)
|
|
|
|
|
{
|
|
|
|
|
if (!cache)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
flow_put(cache->flow);
|
|
|
|
|
ip_rt_put(cache->rt);
|
|
|
|
|
kfree(cache);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void free_config_rcu(struct rcu_head *rcu)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu);
|
|
|
|
|
kfree(c);
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static void free_cache_rcu(struct rcu_head *rcu)
|
|
|
|
|
{
|
|
|
|
|
struct tnl_cache *c = container_of(rcu, struct tnl_cache, rcu);
|
|
|
|
|
free_cache(c);
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
/* Frees the portion of 'mutable' that requires RTNL and thus can't happen
|
|
|
|
|
* within an RCU callback. Fortunately this part doesn't require waiting for
|
|
|
|
|
* an RCU grace period.
|
|
|
|
|
*/
|
|
|
|
|
static void free_mutable_rtnl(struct tnl_mutable_config *mutable)
|
|
|
|
|
{
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
if (ipv4_is_multicast(mutable->key.daddr) && mutable->mlink) {
|
|
|
|
|
struct in_device *in_dev;
|
|
|
|
|
in_dev = inetdev_by_index(&init_net, mutable->mlink);
|
|
|
|
|
if (in_dev)
|
|
|
|
|
ip_mc_dec_group(in_dev, mutable->key.daddr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
static void assign_config_rcu(struct vport *vport,
|
|
|
|
|
struct tnl_mutable_config *new_config)
|
|
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
struct tnl_mutable_config *old_config;
|
|
|
|
|
|
2010-12-05 11:22:04 -08:00
|
|
|
old_config = rtnl_dereference(tnl_vport->mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
rcu_assign_pointer(tnl_vport->mutable, new_config);
|
2011-10-24 12:27:36 -07:00
|
|
|
|
|
|
|
|
free_mutable_rtnl(old_config);
|
2010-08-27 13:55:02 -07:00
|
|
|
call_rcu(&old_config->rcu, free_config_rcu);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache)
|
|
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
struct tnl_cache *old_cache;
|
|
|
|
|
|
2010-12-05 12:36:36 -08:00
|
|
|
old_cache = cache_dereference(tnl_vport);
|
2010-08-27 13:55:02 -07:00
|
|
|
rcu_assign_pointer(tnl_vport->cache, new_cache);
|
|
|
|
|
|
|
|
|
|
if (old_cache)
|
|
|
|
|
call_rcu(&old_cache->rcu, free_cache_rcu);
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable)
|
|
|
|
|
{
|
2011-01-26 12:28:59 -08:00
|
|
|
if (mutable->flags & TNL_F_IN_KEY_MATCH) {
|
2011-09-30 14:32:31 -07:00
|
|
|
if (mutable->key.saddr)
|
2010-08-10 20:11:48 -04:00
|
|
|
return &local_remote_ports;
|
|
|
|
|
else
|
|
|
|
|
return &remote_ports;
|
|
|
|
|
} else {
|
2011-09-30 14:32:31 -07:00
|
|
|
if (mutable->key.saddr)
|
2010-08-10 20:11:48 -04:00
|
|
|
return &key_local_remote_ports;
|
|
|
|
|
else
|
|
|
|
|
return &key_remote_ports;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-30 14:32:31 -07:00
|
|
|
static u32 port_hash(const struct port_lookup_key *key)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2011-10-04 17:48:33 -07:00
|
|
|
return jhash2((u32*)key, (PORT_KEY_LEN / sizeof(u32)), 0);
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
static inline struct hlist_head *find_bucket(u32 hash)
|
|
|
|
|
{
|
|
|
|
|
return &port_table[(hash & (PORT_TABLE_SIZE - 1))];
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
static void port_table_add_port(struct vport *vport)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
2011-09-30 14:32:31 -07:00
|
|
|
const struct tnl_mutable_config *mutable;
|
|
|
|
|
u32 hash;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
if (port_table_count == 0)
|
2010-08-27 13:55:02 -07:00
|
|
|
schedule_cache_cleaner();
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-09-30 14:32:31 -07:00
|
|
|
mutable = rtnl_dereference(tnl_vport->mutable);
|
|
|
|
|
hash = port_hash(&mutable->key);
|
2011-09-09 19:09:47 -07:00
|
|
|
hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(hash));
|
|
|
|
|
port_table_count++;
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2010-12-05 11:22:04 -08:00
|
|
|
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
static void port_table_move_port(struct vport *vport,
|
|
|
|
|
struct tnl_mutable_config *new_mutable)
|
2010-08-27 13:55:02 -07:00
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
u32 hash;
|
|
|
|
|
|
2011-09-30 14:32:31 -07:00
|
|
|
hash = port_hash(&new_mutable->key);
|
2011-09-09 19:09:47 -07:00
|
|
|
hlist_del_init_rcu(&tnl_vport->hash_node);
|
|
|
|
|
hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(hash));
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-12-05 11:22:04 -08:00
|
|
|
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
|
2010-08-27 13:55:02 -07:00
|
|
|
assign_config_rcu(vport, new_mutable);
|
2010-12-05 11:22:04 -08:00
|
|
|
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
static void port_table_remove_port(struct vport *vport)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
hlist_del_init_rcu(&tnl_vport->hash_node);
|
|
|
|
|
|
|
|
|
|
port_table_count--;
|
|
|
|
|
if (port_table_count == 0)
|
|
|
|
|
cancel_delayed_work_sync(&cache_cleaner_wq);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-12-05 11:22:04 -08:00
|
|
|
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
|
2011-09-09 19:09:47 -07:00
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-10-14 14:28:49 -07:00
|
|
|
static struct vport *port_table_lookup(struct port_lookup_key *key,
|
|
|
|
|
const struct tnl_mutable_config **pmutable)
|
2011-09-09 19:09:47 -07:00
|
|
|
{
|
|
|
|
|
struct hlist_node *n;
|
|
|
|
|
struct hlist_head *bucket;
|
2011-09-30 14:32:31 -07:00
|
|
|
u32 hash = port_hash(key);
|
2011-09-09 19:09:47 -07:00
|
|
|
struct tnl_vport * tnl_vport;
|
|
|
|
|
|
|
|
|
|
bucket = find_bucket(hash);
|
|
|
|
|
|
|
|
|
|
hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node) {
|
2011-09-30 14:32:31 -07:00
|
|
|
struct tnl_mutable_config *mutable;
|
|
|
|
|
|
|
|
|
|
mutable = rcu_dereference_rtnl(tnl_vport->mutable);
|
2011-10-04 17:48:33 -07:00
|
|
|
if (!memcmp(&mutable->key, key, PORT_KEY_LEN)) {
|
2011-09-30 14:32:31 -07:00
|
|
|
*pmutable = mutable;
|
2011-10-14 14:28:49 -07:00
|
|
|
return tnl_vport_to_vport(tnl_vport);
|
2011-09-30 14:32:31 -07:00
|
|
|
}
|
2011-09-09 19:09:47 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2010-12-10 10:42:42 -08:00
|
|
|
struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
|
2010-08-10 20:11:48 -04:00
|
|
|
int tunnel_type,
|
|
|
|
|
const struct tnl_mutable_config **mutable)
|
|
|
|
|
{
|
|
|
|
|
struct port_lookup_key lookup;
|
2011-10-14 14:28:49 -07:00
|
|
|
struct vport *vport;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
if (ipv4_is_multicast(saddr)) {
|
|
|
|
|
lookup.saddr = 0;
|
|
|
|
|
lookup.daddr = saddr;
|
|
|
|
|
if (key_remote_ports) {
|
|
|
|
|
lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT;
|
|
|
|
|
lookup.in_key = key;
|
|
|
|
|
vport = port_table_lookup(&lookup, mutable);
|
|
|
|
|
if (vport)
|
|
|
|
|
return vport;
|
|
|
|
|
}
|
|
|
|
|
if (remote_ports) {
|
|
|
|
|
lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH;
|
|
|
|
|
lookup.in_key = 0;
|
|
|
|
|
vport = port_table_lookup(&lookup, mutable);
|
|
|
|
|
if (vport)
|
|
|
|
|
return vport;
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-30 15:58:55 -08:00
|
|
|
lookup.saddr = saddr;
|
|
|
|
|
lookup.daddr = daddr;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-10-14 15:33:49 -07:00
|
|
|
/* First try for exact match on in_key. */
|
|
|
|
|
lookup.in_key = key;
|
|
|
|
|
lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT;
|
|
|
|
|
if (key_local_remote_ports) {
|
|
|
|
|
vport = port_table_lookup(&lookup, mutable);
|
|
|
|
|
if (vport)
|
|
|
|
|
return vport;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
2011-10-14 15:33:49 -07:00
|
|
|
if (key_remote_ports) {
|
|
|
|
|
lookup.saddr = 0;
|
|
|
|
|
vport = port_table_lookup(&lookup, mutable);
|
|
|
|
|
if (vport)
|
|
|
|
|
return vport;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-10-14 15:33:49 -07:00
|
|
|
lookup.saddr = saddr;
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-10-14 15:33:49 -07:00
|
|
|
/* Then try matches that wildcard in_key. */
|
|
|
|
|
lookup.in_key = 0;
|
|
|
|
|
lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH;
|
|
|
|
|
if (local_remote_ports) {
|
|
|
|
|
vport = port_table_lookup(&lookup, mutable);
|
|
|
|
|
if (vport)
|
|
|
|
|
return vport;
|
|
|
|
|
}
|
|
|
|
|
if (remote_ports) {
|
|
|
|
|
lookup.saddr = 0;
|
|
|
|
|
vport = port_table_lookup(&lookup, mutable);
|
|
|
|
|
if (vport)
|
|
|
|
|
return vport;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2011-03-08 17:33:14 -08:00
|
|
|
static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
|
2010-08-27 13:55:02 -07:00
|
|
|
{
|
2011-03-08 17:33:14 -08:00
|
|
|
if (unlikely(INET_ECN_is_ce(tos))) {
|
2010-08-27 13:55:02 -07:00
|
|
|
__be16 protocol = skb->protocol;
|
2010-12-03 19:17:20 -08:00
|
|
|
|
|
|
|
|
skb_set_network_header(skb, ETH_HLEN);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-03-08 17:33:14 -08:00
|
|
|
if (protocol == htons(ETH_P_8021Q)) {
|
2010-08-27 13:55:02 -07:00
|
|
|
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
|
2010-12-03 19:17:20 -08:00
|
|
|
skb_set_network_header(skb, VLAN_ETH_HLEN);
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (protocol == htons(ETH_P_IP)) {
|
2010-12-03 19:17:20 -08:00
|
|
|
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
|
2010-08-27 13:55:02 -07:00
|
|
|
+ sizeof(struct iphdr))))
|
|
|
|
|
return;
|
|
|
|
|
|
2010-12-03 19:17:20 -08:00
|
|
|
IP_ECN_set_ce(ip_hdr(skb));
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else if (protocol == htons(ETH_P_IPV6)) {
|
2010-12-03 19:17:20 -08:00
|
|
|
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
|
2010-08-27 13:55:02 -07:00
|
|
|
+ sizeof(struct ipv6hdr))))
|
|
|
|
|
return;
|
|
|
|
|
|
2010-12-03 19:17:20 -08:00
|
|
|
IP6_ECN_set_ce(ipv6_hdr(skb));
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-03-08 17:33:14 -08:00
|
|
|
/**
|
|
|
|
|
* tnl_rcv - ingress point for generic tunnel code
|
|
|
|
|
*
|
|
|
|
|
* @vport: port this packet was received on
|
|
|
|
|
* @skb: received packet
|
|
|
|
|
* @tos: ToS from encapsulating IP packet, used to copy ECN bits
|
|
|
|
|
*
|
|
|
|
|
* Must be called with rcu_read_lock.
|
|
|
|
|
*
|
|
|
|
|
* Packets received by this function are in the following state:
|
|
|
|
|
* - skb->data points to the inner Ethernet header.
|
|
|
|
|
* - The inner Ethernet header is in the linear data area.
|
|
|
|
|
* - skb->csum does not include the inner Ethernet header.
|
|
|
|
|
* - The layer pointers are undefined.
|
|
|
|
|
*/
|
|
|
|
|
void tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
|
2010-08-27 13:55:02 -07:00
|
|
|
{
|
2011-03-08 17:33:14 -08:00
|
|
|
struct ethhdr *eh;
|
2010-12-03 18:06:23 -08:00
|
|
|
|
2011-03-08 17:33:14 -08:00
|
|
|
skb_reset_mac_header(skb);
|
|
|
|
|
eh = eth_hdr(skb);
|
2010-12-03 18:06:23 -08:00
|
|
|
|
|
|
|
|
if (likely(ntohs(eh->h_proto) >= 1536))
|
|
|
|
|
skb->protocol = eh->h_proto;
|
|
|
|
|
else
|
|
|
|
|
skb->protocol = htons(ETH_P_802_2);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
skb_dst_drop(skb);
|
|
|
|
|
nf_reset(skb);
|
2011-02-07 11:07:14 +09:00
|
|
|
skb_clear_rxhash(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
secpath_reset(skb);
|
|
|
|
|
|
2011-03-08 17:33:14 -08:00
|
|
|
ecn_decapsulate(skb, tos);
|
2010-12-29 22:13:15 -08:00
|
|
|
vlan_set_tci(skb, 0);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-05-27 15:53:49 -07:00
|
|
|
if (unlikely(compute_ip_summed(skb, false))) {
|
|
|
|
|
kfree_skb(skb);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
vport_receive(vport, skb);
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
static bool check_ipv4_address(__be32 addr)
|
|
|
|
|
{
|
|
|
|
|
if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
|
|
|
|
|
|| ipv4_is_loopback(addr) || ipv4_is_zeronet(addr))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool ipv4_should_icmp(struct sk_buff *skb)
|
|
|
|
|
{
|
|
|
|
|
struct iphdr *old_iph = ip_hdr(skb);
|
|
|
|
|
|
|
|
|
|
/* Don't respond to L2 broadcast. */
|
|
|
|
|
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Don't respond to L3 broadcast or invalid addresses. */
|
|
|
|
|
if (!check_ipv4_address(old_iph->daddr) ||
|
|
|
|
|
!check_ipv4_address(old_iph->saddr))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Only respond to the first fragment. */
|
|
|
|
|
if (old_iph->frag_off & htons(IP_OFFSET))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Don't respond to ICMP error messages. */
|
|
|
|
|
if (old_iph->protocol == IPPROTO_ICMP) {
|
|
|
|
|
u8 icmp_type, *icmp_typep;
|
|
|
|
|
|
|
|
|
|
icmp_typep = skb_header_pointer(skb, (u8 *)old_iph +
|
|
|
|
|
(old_iph->ihl << 2) +
|
|
|
|
|
offsetof(struct icmphdr, type) -
|
|
|
|
|
skb->data, sizeof(icmp_type),
|
|
|
|
|
&icmp_type);
|
|
|
|
|
|
|
|
|
|
if (!icmp_typep)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (*icmp_typep > NR_ICMP_TYPES
|
|
|
|
|
|| (*icmp_typep <= ICMP_PARAMETERPROB
|
|
|
|
|
&& *icmp_typep != ICMP_ECHOREPLY
|
|
|
|
|
&& *icmp_typep != ICMP_ECHO))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
|
|
|
|
|
unsigned int mtu, unsigned int payload_length)
|
|
|
|
|
{
|
|
|
|
|
struct iphdr *iph, *old_iph = ip_hdr(skb);
|
|
|
|
|
struct icmphdr *icmph;
|
|
|
|
|
u8 *payload;
|
|
|
|
|
|
|
|
|
|
iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
|
|
|
|
|
icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
|
|
|
|
|
payload = skb_put(nskb, payload_length);
|
|
|
|
|
|
|
|
|
|
/* IP */
|
|
|
|
|
iph->version = 4;
|
|
|
|
|
iph->ihl = sizeof(struct iphdr) >> 2;
|
|
|
|
|
iph->tos = (old_iph->tos & IPTOS_TOS_MASK) |
|
|
|
|
|
IPTOS_PREC_INTERNETCONTROL;
|
|
|
|
|
iph->tot_len = htons(sizeof(struct iphdr)
|
|
|
|
|
+ sizeof(struct icmphdr)
|
|
|
|
|
+ payload_length);
|
|
|
|
|
get_random_bytes(&iph->id, sizeof(iph->id));
|
|
|
|
|
iph->frag_off = 0;
|
|
|
|
|
iph->ttl = IPDEFTTL;
|
|
|
|
|
iph->protocol = IPPROTO_ICMP;
|
|
|
|
|
iph->daddr = old_iph->saddr;
|
|
|
|
|
iph->saddr = old_iph->daddr;
|
|
|
|
|
|
|
|
|
|
ip_send_check(iph);
|
|
|
|
|
|
|
|
|
|
/* ICMP */
|
|
|
|
|
icmph->type = ICMP_DEST_UNREACH;
|
|
|
|
|
icmph->code = ICMP_FRAG_NEEDED;
|
|
|
|
|
icmph->un.gateway = htonl(mtu);
|
|
|
|
|
icmph->checksum = 0;
|
|
|
|
|
|
|
|
|
|
nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0);
|
|
|
|
|
nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data,
|
|
|
|
|
payload, payload_length,
|
|
|
|
|
nskb->csum);
|
|
|
|
|
icmph->checksum = csum_fold(nskb->csum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
static bool ipv6_should_icmp(struct sk_buff *skb)
|
|
|
|
|
{
|
|
|
|
|
struct ipv6hdr *old_ipv6h = ipv6_hdr(skb);
|
|
|
|
|
int addr_type;
|
|
|
|
|
int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
|
|
|
|
|
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
|
|
|
|
|
|
|
|
|
|
/* Check source address is valid. */
|
|
|
|
|
addr_type = ipv6_addr_type(&old_ipv6h->saddr);
|
|
|
|
|
if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Don't reply to unspecified addresses. */
|
|
|
|
|
if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Don't respond to ICMP error messages. */
|
|
|
|
|
payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr);
|
|
|
|
|
if (payload_off < 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (nexthdr == NEXTHDR_ICMP) {
|
|
|
|
|
u8 icmp_type, *icmp_typep;
|
|
|
|
|
|
|
|
|
|
icmp_typep = skb_header_pointer(skb, payload_off +
|
|
|
|
|
offsetof(struct icmp6hdr,
|
|
|
|
|
icmp6_type),
|
|
|
|
|
sizeof(icmp_type), &icmp_type);
|
|
|
|
|
|
|
|
|
|
if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK))
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
|
|
|
|
|
unsigned int mtu, unsigned int payload_length)
|
|
|
|
|
{
|
|
|
|
|
struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb);
|
|
|
|
|
struct icmp6hdr *icmp6h;
|
|
|
|
|
u8 *payload;
|
|
|
|
|
|
|
|
|
|
ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr));
|
|
|
|
|
icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr));
|
|
|
|
|
payload = skb_put(nskb, payload_length);
|
|
|
|
|
|
|
|
|
|
/* IPv6 */
|
|
|
|
|
ipv6h->version = 6;
|
|
|
|
|
ipv6h->priority = 0;
|
|
|
|
|
memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl));
|
|
|
|
|
ipv6h->payload_len = htons(sizeof(struct icmp6hdr)
|
|
|
|
|
+ payload_length);
|
|
|
|
|
ipv6h->nexthdr = NEXTHDR_ICMP;
|
|
|
|
|
ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
|
|
|
|
|
ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr);
|
|
|
|
|
ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr);
|
|
|
|
|
|
|
|
|
|
/* ICMPv6 */
|
|
|
|
|
icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
|
|
|
|
|
icmp6h->icmp6_code = 0;
|
|
|
|
|
icmp6h->icmp6_cksum = 0;
|
|
|
|
|
icmp6h->icmp6_mtu = htonl(mtu);
|
|
|
|
|
|
|
|
|
|
nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0);
|
|
|
|
|
nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data,
|
|
|
|
|
payload, payload_length,
|
|
|
|
|
nskb->csum);
|
|
|
|
|
icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
|
|
|
|
|
sizeof(struct icmp6hdr)
|
|
|
|
|
+ payload_length,
|
|
|
|
|
ipv6h->nexthdr, nskb->csum);
|
|
|
|
|
}
|
|
|
|
|
#endif /* IPv6 */
|
|
|
|
|
|
|
|
|
|
bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable,
|
2010-12-10 10:42:42 -08:00
|
|
|
struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
unsigned int eth_hdr_len = ETH_HLEN;
|
|
|
|
|
unsigned int total_length = 0, header_length = 0, payload_length;
|
|
|
|
|
struct ethhdr *eh, *old_eh = eth_hdr(skb);
|
|
|
|
|
struct sk_buff *nskb;
|
|
|
|
|
|
|
|
|
|
/* Sanity check */
|
|
|
|
|
if (skb->protocol == htons(ETH_P_IP)) {
|
|
|
|
|
if (mtu < IP_MIN_MTU)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!ipv4_should_icmp(skb))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else if (skb->protocol == htons(ETH_P_IPV6)) {
|
|
|
|
|
if (mtu < IPV6_MIN_MTU)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In theory we should do PMTUD on IPv6 multicast messages but
|
|
|
|
|
* we don't have an address to send from so just fragment.
|
|
|
|
|
*/
|
|
|
|
|
if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!ipv6_should_icmp(skb))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
else
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Allocate */
|
|
|
|
|
if (old_eh->h_proto == htons(ETH_P_8021Q))
|
|
|
|
|
eth_hdr_len = VLAN_ETH_HLEN;
|
|
|
|
|
|
|
|
|
|
payload_length = skb->len - eth_hdr_len;
|
|
|
|
|
if (skb->protocol == htons(ETH_P_IP)) {
|
|
|
|
|
header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
|
|
|
|
|
total_length = min_t(unsigned int, header_length +
|
|
|
|
|
payload_length, 576);
|
|
|
|
|
}
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else {
|
|
|
|
|
header_length = sizeof(struct ipv6hdr) +
|
|
|
|
|
sizeof(struct icmp6hdr);
|
|
|
|
|
total_length = min_t(unsigned int, header_length +
|
|
|
|
|
payload_length, IPV6_MIN_MTU);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
payload_length = total_length - header_length;
|
|
|
|
|
|
|
|
|
|
nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
|
|
|
|
|
payload_length);
|
|
|
|
|
if (!nskb)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
skb_reserve(nskb, NET_IP_ALIGN);
|
|
|
|
|
|
|
|
|
|
/* Ethernet / VLAN */
|
|
|
|
|
eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len);
|
|
|
|
|
memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN);
|
|
|
|
|
memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN);
|
|
|
|
|
nskb->protocol = eh->h_proto = old_eh->h_proto;
|
|
|
|
|
if (old_eh->h_proto == htons(ETH_P_8021Q)) {
|
|
|
|
|
struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh;
|
|
|
|
|
|
|
|
|
|
vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
|
|
|
|
|
vh->h_vlan_encapsulated_proto = skb->protocol;
|
2010-12-29 22:13:15 -08:00
|
|
|
} else
|
|
|
|
|
vlan_set_tci(nskb, vlan_get_tci(skb));
|
2010-08-10 20:11:48 -04:00
|
|
|
skb_reset_mac_header(nskb);
|
|
|
|
|
|
|
|
|
|
/* Protocol */
|
|
|
|
|
if (skb->protocol == htons(ETH_P_IP))
|
|
|
|
|
ipv4_build_icmp(skb, nskb, mtu, payload_length);
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else
|
|
|
|
|
ipv6_build_icmp(skb, nskb, mtu, payload_length);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Assume that flow based keys are symmetric with respect to input
|
|
|
|
|
* and output and use the key that we were going to put on the
|
|
|
|
|
* outgoing packet for the fake received packet. If the keys are
|
|
|
|
|
* not symmetric then PMTUD needs to be disabled since we won't have
|
|
|
|
|
* any way of synthesizing packets.
|
|
|
|
|
*/
|
2011-01-26 12:28:59 -08:00
|
|
|
if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) ==
|
2010-08-10 20:11:48 -04:00
|
|
|
(TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
|
|
|
|
|
OVS_CB(nskb)->tun_id = flow_key;
|
|
|
|
|
|
2011-05-27 15:53:49 -07:00
|
|
|
if (unlikely(compute_ip_summed(nskb, false))) {
|
|
|
|
|
kfree_skb(nskb);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
vport_receive(vport, nskb);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static bool check_mtu(struct sk_buff *skb,
|
|
|
|
|
struct vport *vport,
|
|
|
|
|
const struct tnl_mutable_config *mutable,
|
|
|
|
|
const struct rtable *rt, __be16 *frag_offp)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2011-04-29 17:05:58 -07:00
|
|
|
bool df_inherit = mutable->flags & TNL_F_DF_INHERIT;
|
2011-01-27 18:16:07 -08:00
|
|
|
bool pmtud = mutable->flags & TNL_F_PMTUD;
|
2011-04-29 17:05:58 -07:00
|
|
|
__be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
|
2011-02-04 13:15:18 -08:00
|
|
|
int mtu = 0;
|
2011-02-04 13:25:04 -08:00
|
|
|
unsigned int packet_length = skb->len - ETH_HLEN;
|
|
|
|
|
|
2010-12-29 22:13:15 -08:00
|
|
|
/* Allow for one level of tagging in the packet length. */
|
|
|
|
|
if (!vlan_tx_tag_present(skb) &&
|
|
|
|
|
eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
|
2011-02-04 13:25:04 -08:00
|
|
|
packet_length -= VLAN_HLEN;
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-01-27 18:16:07 -08:00
|
|
|
if (pmtud) {
|
2010-12-29 22:13:15 -08:00
|
|
|
int vlan_header = 0;
|
|
|
|
|
|
|
|
|
|
/* The tag needs to go in packet regardless of where it
|
|
|
|
|
* currently is, so subtract it from the MTU.
|
|
|
|
|
*/
|
|
|
|
|
if (vlan_tx_tag_present(skb) ||
|
|
|
|
|
eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
|
|
|
|
|
vlan_header = VLAN_HLEN;
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
mtu = dst_mtu(&rt_dst(rt))
|
|
|
|
|
- ETH_HLEN
|
|
|
|
|
- mutable->tunnel_hlen
|
2010-12-29 22:13:15 -08:00
|
|
|
- vlan_header;
|
2011-01-27 18:16:07 -08:00
|
|
|
}
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
if (skb->protocol == htons(ETH_P_IP)) {
|
2011-01-27 18:16:07 -08:00
|
|
|
struct iphdr *iph = ip_hdr(skb);
|
|
|
|
|
|
2011-04-29 17:05:58 -07:00
|
|
|
if (df_inherit)
|
|
|
|
|
frag_off = iph->frag_off & htons(IP_DF);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-01-27 18:16:07 -08:00
|
|
|
if (pmtud && iph->frag_off & htons(IP_DF)) {
|
|
|
|
|
mtu = max(mtu, IP_MIN_MTU);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-02-04 13:25:04 -08:00
|
|
|
if (packet_length > mtu &&
|
2011-01-27 18:16:07 -08:00
|
|
|
tnl_frag_needed(vport, mutable, skb, mtu,
|
|
|
|
|
OVS_CB(skb)->tun_id))
|
|
|
|
|
return false;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else if (skb->protocol == htons(ETH_P_IPV6)) {
|
2011-04-29 17:05:58 -07:00
|
|
|
/* IPv6 requires end hosts to do fragmentation
|
|
|
|
|
* if the packet is above the minimum MTU.
|
|
|
|
|
*/
|
|
|
|
|
if (df_inherit && packet_length > IPV6_MIN_MTU)
|
2010-08-27 13:55:02 -07:00
|
|
|
frag_off = htons(IP_DF);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-01-27 18:16:07 -08:00
|
|
|
if (pmtud) {
|
|
|
|
|
mtu = max(mtu, IPV6_MIN_MTU);
|
|
|
|
|
|
|
|
|
|
if (packet_length > mtu &&
|
|
|
|
|
tnl_frag_needed(vport, mutable, skb, mtu,
|
|
|
|
|
OVS_CB(skb)->tun_id))
|
|
|
|
|
return false;
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
2010-08-27 13:55:02 -07:00
|
|
|
#endif
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
*frag_offp = frag_off;
|
|
|
|
|
return true;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static void create_tunnel_header(const struct vport *vport,
|
|
|
|
|
const struct tnl_mutable_config *mutable,
|
|
|
|
|
const struct rtable *rt, void *header)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2010-08-27 13:55:02 -07:00
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
struct iphdr *iph = header;
|
|
|
|
|
|
|
|
|
|
iph->version = 4;
|
|
|
|
|
iph->ihl = sizeof(struct iphdr) >> 2;
|
|
|
|
|
iph->frag_off = htons(IP_DF);
|
|
|
|
|
iph->protocol = tnl_vport->tnl_ops->ipproto;
|
2011-01-26 12:28:59 -08:00
|
|
|
iph->tos = mutable->tos;
|
2010-08-27 13:55:02 -07:00
|
|
|
iph->daddr = rt->rt_dst;
|
|
|
|
|
iph->saddr = rt->rt_src;
|
2011-01-26 12:28:59 -08:00
|
|
|
iph->ttl = mutable->ttl;
|
2010-08-27 13:55:02 -07:00
|
|
|
if (!iph->ttl)
|
2011-03-18 14:37:33 -07:00
|
|
|
iph->ttl = ip4_dst_hoplimit(&rt_dst(rt));
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static inline void *get_cached_header(const struct tnl_cache *cache)
|
|
|
|
|
{
|
|
|
|
|
return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN);
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static inline bool check_cache_valid(const struct tnl_cache *cache,
|
|
|
|
|
const struct tnl_mutable_config *mutable)
|
|
|
|
|
{
|
2011-10-21 14:16:04 -07:00
|
|
|
struct hh_cache *hh;
|
|
|
|
|
|
|
|
|
|
if (!cache)
|
|
|
|
|
return false;
|
|
|
|
|
|
2011-10-21 14:16:59 -07:00
|
|
|
hh = rt_hh(cache->rt);
|
2011-10-21 14:16:04 -07:00
|
|
|
return hh &&
|
2010-08-27 13:55:02 -07:00
|
|
|
#ifdef NEED_CACHE_TIMEOUT
|
|
|
|
|
time_before(jiffies, cache->expiration) &&
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef HAVE_RT_GENID
|
|
|
|
|
atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid &&
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef HAVE_HH_SEQ
|
2011-10-21 14:16:04 -07:00
|
|
|
hh->hh_lock.sequence == cache->hh_seq &&
|
2010-08-27 13:55:02 -07:00
|
|
|
#endif
|
|
|
|
|
mutable->seq == cache->mutable_seq &&
|
|
|
|
|
(!is_internal_dev(rt_dst(cache->rt).dev) ||
|
|
|
|
|
(cache->flow && !cache->flow->dead));
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
static void __cache_cleaner(struct tnl_vport *tnl_vport)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2011-09-09 19:09:47 -07:00
|
|
|
const struct tnl_mutable_config *mutable =
|
|
|
|
|
rcu_dereference(tnl_vport->mutable);
|
2010-08-27 13:55:02 -07:00
|
|
|
const struct tnl_cache *cache = rcu_dereference(tnl_vport->cache);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
if (cache && !check_cache_valid(cache, mutable) &&
|
|
|
|
|
spin_trylock_bh(&tnl_vport->cache_lock)) {
|
|
|
|
|
assign_cache_rcu(tnl_vport_to_vport(tnl_vport), NULL);
|
|
|
|
|
spin_unlock_bh(&tnl_vport->cache_lock);
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static void cache_cleaner(struct work_struct *work)
|
|
|
|
|
{
|
2011-09-09 19:09:47 -07:00
|
|
|
int i;
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
schedule_cache_cleaner();
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
rcu_read_lock();
|
2011-09-09 19:09:47 -07:00
|
|
|
for (i = 0; i < PORT_TABLE_SIZE; i++) {
|
|
|
|
|
struct hlist_node *n;
|
|
|
|
|
struct hlist_head *bucket;
|
|
|
|
|
struct tnl_vport *tnl_vport;
|
|
|
|
|
|
|
|
|
|
bucket = &port_table[i];
|
|
|
|
|
hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node)
|
|
|
|
|
__cache_cleaner(tnl_vport);
|
|
|
|
|
}
|
2010-08-27 13:55:02 -07:00
|
|
|
rcu_read_unlock();
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static inline void create_eth_hdr(struct tnl_cache *cache,
|
2011-10-21 14:16:04 -07:00
|
|
|
struct hh_cache *hh)
|
2010-08-27 13:55:02 -07:00
|
|
|
{
|
|
|
|
|
void *cache_data = get_cached_header(cache);
|
2011-10-21 14:16:04 -07:00
|
|
|
int hh_off;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
#ifdef HAVE_HH_SEQ
|
|
|
|
|
unsigned hh_seq;
|
|
|
|
|
|
|
|
|
|
do {
|
2011-10-21 14:16:04 -07:00
|
|
|
hh_seq = read_seqbegin(&hh->hh_lock);
|
|
|
|
|
hh_off = HH_DATA_ALIGN(hh->hh_len) - hh->hh_len;
|
|
|
|
|
memcpy(cache_data, (void *)hh->hh_data + hh_off, hh->hh_len);
|
|
|
|
|
cache->hh_len = hh->hh_len;
|
|
|
|
|
} while (read_seqretry(&hh->hh_lock, hh_seq));
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
cache->hh_seq = hh_seq;
|
|
|
|
|
#else
|
2011-10-21 14:16:04 -07:00
|
|
|
read_lock(&hh->hh_lock);
|
|
|
|
|
hh_off = HH_DATA_ALIGN(hh->hh_len) - hh->hh_len;
|
|
|
|
|
memcpy(cache_data, (void *)hh->hh_data + hh_off, hh->hh_len);
|
|
|
|
|
cache->hh_len = hh->hh_len;
|
|
|
|
|
read_unlock(&hh->hh_lock);
|
2010-08-10 20:11:48 -04:00
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static struct tnl_cache *build_cache(struct vport *vport,
|
|
|
|
|
const struct tnl_mutable_config *mutable,
|
|
|
|
|
struct rtable *rt)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2010-08-27 13:55:02 -07:00
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
struct tnl_cache *cache;
|
|
|
|
|
void *cache_data;
|
|
|
|
|
int cache_len;
|
2011-10-21 14:16:04 -07:00
|
|
|
struct hh_cache *hh;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
if (!(mutable->flags & TNL_F_HDR_CACHE))
|
2010-08-27 13:55:02 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If there is no entry in the ARP cache or if this device does not
|
|
|
|
|
* support hard header caching just fall back to the IP stack.
|
|
|
|
|
*/
|
2011-10-21 14:16:04 -07:00
|
|
|
|
2011-10-21 14:16:59 -07:00
|
|
|
hh = rt_hh(rt);
|
2011-10-21 14:16:04 -07:00
|
|
|
if (!hh)
|
2010-08-27 13:55:02 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If lock is contended fall back to directly building the header.
|
|
|
|
|
* We're not going to help performance by sitting here spinning.
|
|
|
|
|
*/
|
2011-10-06 21:52:39 -07:00
|
|
|
if (!spin_trylock(&tnl_vport->cache_lock))
|
2010-08-27 13:55:02 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
2010-12-05 12:36:36 -08:00
|
|
|
cache = cache_dereference(tnl_vport);
|
2010-08-27 13:55:02 -07:00
|
|
|
if (check_cache_valid(cache, mutable))
|
|
|
|
|
goto unlock;
|
|
|
|
|
else
|
|
|
|
|
cache = NULL;
|
|
|
|
|
|
2011-10-21 14:16:04 -07:00
|
|
|
cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + mutable->tunnel_hlen;
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) +
|
|
|
|
|
cache_len, GFP_ATOMIC);
|
|
|
|
|
if (!cache)
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
2011-10-21 14:16:04 -07:00
|
|
|
create_eth_hdr(cache, hh);
|
|
|
|
|
cache_data = get_cached_header(cache) + cache->hh_len;
|
|
|
|
|
cache->len = cache->hh_len + mutable->tunnel_hlen;
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
create_tunnel_header(vport, mutable, rt, cache_data);
|
|
|
|
|
|
|
|
|
|
cache->mutable_seq = mutable->seq;
|
|
|
|
|
cache->rt = rt;
|
|
|
|
|
#ifdef NEED_CACHE_TIMEOUT
|
|
|
|
|
cache->expiration = jiffies + tnl_vport->cache_exp_interval;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (is_internal_dev(rt_dst(rt).dev)) {
|
2011-01-23 18:44:44 -08:00
|
|
|
struct sw_flow_key flow_key;
|
2010-12-27 21:19:35 -08:00
|
|
|
struct vport *dst_vport;
|
2010-08-27 13:55:02 -07:00
|
|
|
struct sk_buff *skb;
|
2010-12-03 13:09:26 -08:00
|
|
|
int err;
|
2011-05-18 11:30:07 -07:00
|
|
|
int flow_key_len;
|
2011-09-09 19:09:47 -07:00
|
|
|
struct sw_flow *flow;
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2010-12-27 21:19:35 -08:00
|
|
|
dst_vport = internal_dev_get_vport(rt_dst(rt).dev);
|
|
|
|
|
if (!dst_vport)
|
2010-08-27 13:55:02 -07:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
skb = alloc_skb(cache->len, GFP_ATOMIC);
|
|
|
|
|
if (!skb)
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
__skb_put(skb, cache->len);
|
|
|
|
|
memcpy(skb->data, get_cached_header(cache), cache->len);
|
|
|
|
|
|
2011-05-18 11:30:07 -07:00
|
|
|
err = flow_extract(skb, dst_vport->port_no, &flow_key,
|
Implement new fragment handling policy.
Until now, OVS has handled IP fragments more awkwardly than necessary. It
has not been possible to match on L4 headers, even in fragments with offset
0 where they are actually present. This means that there was no way to
implement ACLs that treat, say, different TCP ports differently, on
fragmented traffic; instead, all decisions for fragment forwarding had to
be made on the basis of L2 and L3 headers alone.
This commit improves the situation significantly. It is still not possible
to match on L4 headers in fragments with nonzero offset, because that
information is simply not present in such fragments, but this commit adds
the ability to match on L4 headers for fragments with zero offset. This
means that it becomes possible to implement ACLs that drop such "first
fragments" on the basis of L4 headers. In practice, that effectively
blocks even fragmented traffic on an L4 basis, because the receiving IP
stack cannot reassemble a full packet when the first fragment is missing.
This commit works by adding a new "fragment type" to the kernel flow match
and making it available through OpenFlow as a new NXM field named
NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields
are always 0 for IP fragments, it adds a new OpenFlow fragment handling
mode that fills in the L4 fields for "first fragments". It also enhances
ovs-ofctl to allow users to configure this new fragment handling mode and
to parse the new field.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Bug #7557.
2011-10-19 21:33:44 -07:00
|
|
|
&flow_key_len);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-06-16 15:32:26 -07:00
|
|
|
consume_skb(skb);
|
Implement new fragment handling policy.
Until now, OVS has handled IP fragments more awkwardly than necessary. It
has not been possible to match on L4 headers, even in fragments with offset
0 where they are actually present. This means that there was no way to
implement ACLs that treat, say, different TCP ports differently, on
fragmented traffic; instead, all decisions for fragment forwarding had to
be made on the basis of L2 and L3 headers alone.
This commit improves the situation significantly. It is still not possible
to match on L4 headers in fragments with nonzero offset, because that
information is simply not present in such fragments, but this commit adds
the ability to match on L4 headers for fragments with zero offset. This
means that it becomes possible to implement ACLs that drop such "first
fragments" on the basis of L4 headers. In practice, that effectively
blocks even fragmented traffic on an L4 basis, because the receiving IP
stack cannot reassemble a full packet when the first fragment is missing.
This commit works by adding a new "fragment type" to the kernel flow match
and making it available through OpenFlow as a new NXM field named
NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields
are always 0 for IP fragments, it adds a new OpenFlow fragment handling
mode that fills in the L4 fields for "first fragments". It also enhances
ovs-ofctl to allow users to configure this new fragment handling mode and
to parse the new field.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Bug #7557.
2011-10-19 21:33:44 -07:00
|
|
|
if (err)
|
2010-08-27 13:55:02 -07:00
|
|
|
goto done;
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
flow = flow_tbl_lookup(rcu_dereference(dst_vport->dp->table),
|
|
|
|
|
&flow_key, flow_key_len);
|
|
|
|
|
if (flow) {
|
2010-08-27 13:55:02 -07:00
|
|
|
cache->flow = flow;
|
|
|
|
|
flow_hold(flow);
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
done:
|
|
|
|
|
assign_cache_rcu(vport, cache);
|
|
|
|
|
|
|
|
|
|
unlock:
|
2011-10-06 21:52:39 -07:00
|
|
|
spin_unlock(&tnl_vport->cache_lock);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
return cache;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
|
|
|
|
|
u8 ipproto, u8 tos)
|
|
|
|
|
{
|
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
|
|
|
|
|
struct flowi fl = { .nl_u = { .ip4_u =
|
|
|
|
|
{ .daddr = mutable->key.daddr,
|
|
|
|
|
.saddr = mutable->key.saddr,
|
|
|
|
|
.tos = tos } },
|
|
|
|
|
.proto = ipproto };
|
|
|
|
|
struct rtable *rt;
|
|
|
|
|
|
|
|
|
|
if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
|
|
|
|
|
return ERR_PTR(-EADDRNOTAVAIL);
|
|
|
|
|
|
|
|
|
|
return rt;
|
|
|
|
|
#else
|
|
|
|
|
struct flowi4 fl = { .daddr = mutable->key.daddr,
|
|
|
|
|
.saddr = mutable->key.saddr,
|
|
|
|
|
.flowi4_tos = tos,
|
|
|
|
|
.flowi4_proto = ipproto };
|
|
|
|
|
|
|
|
|
|
return ip_route_output_key(&init_net, &fl);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static struct rtable *find_route(struct vport *vport,
|
|
|
|
|
const struct tnl_mutable_config *mutable,
|
|
|
|
|
u8 tos, struct tnl_cache **cache)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2010-08-27 13:55:02 -07:00
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache);
|
|
|
|
|
|
|
|
|
|
*cache = NULL;
|
|
|
|
|
tos = RT_TOS(tos);
|
|
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
if (likely(tos == mutable->tos && check_cache_valid(cur_cache, mutable))) {
|
2010-08-27 13:55:02 -07:00
|
|
|
*cache = cur_cache;
|
|
|
|
|
return cur_cache->rt;
|
|
|
|
|
} else {
|
|
|
|
|
struct rtable *rt;
|
2011-03-31 16:32:07 +09:00
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos);
|
2011-03-31 16:32:07 +09:00
|
|
|
if (IS_ERR(rt))
|
|
|
|
|
return NULL;
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
if (likely(tos == mutable->tos))
|
2010-08-27 13:55:02 -07:00
|
|
|
*cache = build_cache(vport, mutable, rt);
|
|
|
|
|
|
|
|
|
|
return rt;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static inline bool need_linearize(const struct sk_buff *skb)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2010-08-27 13:55:02 -07:00
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (unlikely(skb_shinfo(skb)->frag_list))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Generally speaking we should linearize if there are paged frags.
|
|
|
|
|
* However, if all of the refcounts are 1 we know nobody else can
|
|
|
|
|
* change them from underneath us and we can skip the linearization.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
|
2011-02-07 13:05:34 -08:00
|
|
|
if (unlikely(page_count(skb_shinfo(skb)->frags[i].page) > 1))
|
2010-08-27 13:55:02 -07:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct sk_buff *handle_offloads(struct sk_buff *skb,
|
|
|
|
|
const struct tnl_mutable_config *mutable,
|
|
|
|
|
const struct rtable *rt)
|
|
|
|
|
{
|
|
|
|
|
int min_headroom;
|
2010-08-10 20:11:48 -04:00
|
|
|
int err;
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
|
2010-12-29 22:13:15 -08:00
|
|
|
+ mutable->tunnel_hlen
|
|
|
|
|
+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
|
|
|
|
|
|
2011-06-07 17:09:35 -07:00
|
|
|
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
|
|
|
|
|
int head_delta = SKB_DATA_ALIGN(min_headroom -
|
|
|
|
|
skb_headroom(skb) +
|
|
|
|
|
16);
|
|
|
|
|
err = pskb_expand_head(skb, max_t(int, head_delta, 0),
|
|
|
|
|
0, GFP_ATOMIC);
|
|
|
|
|
if (unlikely(err))
|
|
|
|
|
goto error_free;
|
2010-12-29 22:13:15 -08:00
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-05-27 15:53:49 -07:00
|
|
|
forward_ip_summed(skb, true);
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
if (skb_is_gso(skb)) {
|
|
|
|
|
struct sk_buff *nskb;
|
|
|
|
|
|
|
|
|
|
nskb = skb_gso_segment(skb, 0);
|
2010-12-10 16:41:33 -08:00
|
|
|
if (IS_ERR(nskb)) {
|
2011-06-16 15:32:26 -07:00
|
|
|
kfree_skb(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
err = PTR_ERR(nskb);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-06-16 15:32:26 -07:00
|
|
|
consume_skb(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
skb = nskb;
|
2011-05-27 15:53:49 -07:00
|
|
|
} else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
|
2010-12-29 22:13:15 -08:00
|
|
|
/* Pages aren't locked and could change at any time.
|
|
|
|
|
* If this happens after we compute the checksum, the
|
|
|
|
|
* checksum will be wrong. We linearize now to avoid
|
|
|
|
|
* this problem.
|
|
|
|
|
*/
|
|
|
|
|
if (unlikely(need_linearize(skb))) {
|
|
|
|
|
err = __skb_linearize(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
if (unlikely(err))
|
2010-08-10 20:11:48 -04:00
|
|
|
goto error_free;
|
2010-12-29 22:13:15 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = skb_checksum_help(skb);
|
|
|
|
|
if (unlikely(err))
|
|
|
|
|
goto error_free;
|
2011-05-27 15:53:49 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
set_ip_summed(skb, OVS_CSUM_NONE);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
return skb;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
error_free:
|
|
|
|
|
kfree_skb(skb);
|
|
|
|
|
error:
|
|
|
|
|
return ERR_PTR(err);
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static int send_frags(struct sk_buff *skb,
|
|
|
|
|
const struct tnl_mutable_config *mutable)
|
|
|
|
|
{
|
|
|
|
|
int sent_len;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
sent_len = 0;
|
2010-08-16 10:32:41 -04:00
|
|
|
while (skb) {
|
|
|
|
|
struct sk_buff *next = skb->next;
|
|
|
|
|
int frag_len = skb->len - mutable->tunnel_hlen;
|
2011-03-01 15:31:32 -08:00
|
|
|
int err;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-16 10:32:41 -04:00
|
|
|
skb->next = NULL;
|
2010-12-02 13:07:36 -08:00
|
|
|
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
|
2010-11-19 13:10:14 -08:00
|
|
|
|
2010-08-16 10:32:41 -04:00
|
|
|
err = ip_local_out(skb);
|
|
|
|
|
skb = next;
|
2011-03-01 15:31:32 -08:00
|
|
|
if (unlikely(net_xmit_eval(err)))
|
|
|
|
|
goto free_frags;
|
|
|
|
|
sent_len += frag_len;
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
2010-08-16 10:32:41 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
return sent_len;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-16 10:32:41 -04:00
|
|
|
free_frags:
|
|
|
|
|
/*
|
|
|
|
|
* There's no point in continuing to send fragments once one has been
|
|
|
|
|
* dropped so just free the rest. This may help improve the congestion
|
|
|
|
|
* that caused the first packet to be dropped.
|
|
|
|
|
*/
|
2010-08-27 13:55:02 -07:00
|
|
|
tnl_free_linked_skbs(skb);
|
|
|
|
|
return sent_len;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int tnl_send(struct vport *vport, struct sk_buff *skb)
|
|
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
enum vport_err_type err = VPORT_E_TX_ERROR;
|
2010-08-10 20:11:48 -04:00
|
|
|
struct rtable *rt;
|
2010-08-27 13:55:02 -07:00
|
|
|
struct dst_entry *unattached_dst = NULL;
|
|
|
|
|
struct tnl_cache *cache;
|
|
|
|
|
int sent_len = 0;
|
2011-01-27 18:16:07 -08:00
|
|
|
__be16 frag_off = 0;
|
2010-08-27 13:55:02 -07:00
|
|
|
u8 ttl;
|
|
|
|
|
u8 inner_tos;
|
|
|
|
|
u8 tos;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
|
|
|
|
/* Validate the protocol headers before we try to use them. */
|
2010-12-29 22:13:15 -08:00
|
|
|
if (skb->protocol == htons(ETH_P_8021Q) &&
|
|
|
|
|
!vlan_tx_tag_present(skb)) {
|
2010-08-10 20:11:48 -04:00
|
|
|
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
|
|
|
|
|
goto error_free;
|
|
|
|
|
|
|
|
|
|
skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
|
|
|
|
|
skb_set_network_header(skb, VLAN_ETH_HLEN);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (skb->protocol == htons(ETH_P_IP)) {
|
2010-08-27 13:55:02 -07:00
|
|
|
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
|
|
|
|
|
+ sizeof(struct iphdr))))
|
2010-08-10 20:11:48 -04:00
|
|
|
skb->protocol = 0;
|
|
|
|
|
}
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else if (skb->protocol == htons(ETH_P_IPV6)) {
|
2010-08-27 13:55:02 -07:00
|
|
|
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
|
|
|
|
|
+ sizeof(struct ipv6hdr))))
|
2010-08-10 20:11:48 -04:00
|
|
|
skb->protocol = 0;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
/* ToS */
|
|
|
|
|
if (skb->protocol == htons(ETH_P_IP))
|
|
|
|
|
inner_tos = ip_hdr(skb)->tos;
|
2010-08-10 20:11:48 -04:00
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
2010-08-27 13:55:02 -07:00
|
|
|
else if (skb->protocol == htons(ETH_P_IPV6))
|
|
|
|
|
inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
|
2010-08-10 20:11:48 -04:00
|
|
|
#endif
|
2010-08-27 13:55:02 -07:00
|
|
|
else
|
|
|
|
|
inner_tos = 0;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
if (mutable->flags & TNL_F_TOS_INHERIT)
|
2010-08-27 13:55:02 -07:00
|
|
|
tos = inner_tos;
|
|
|
|
|
else
|
2011-01-26 12:28:59 -08:00
|
|
|
tos = mutable->tos;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
tos = INET_ECN_encapsulate(tos, inner_tos);
|
|
|
|
|
|
|
|
|
|
/* Route lookup */
|
|
|
|
|
rt = find_route(vport, mutable, tos, &cache);
|
|
|
|
|
if (unlikely(!rt))
|
|
|
|
|
goto error_free;
|
|
|
|
|
if (unlikely(!cache))
|
|
|
|
|
unattached_dst = &rt_dst(rt);
|
|
|
|
|
|
|
|
|
|
/* Reset SKB */
|
|
|
|
|
nf_reset(skb);
|
|
|
|
|
secpath_reset(skb);
|
|
|
|
|
skb_dst_drop(skb);
|
2011-02-07 11:07:14 +09:00
|
|
|
skb_clear_rxhash(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
|
|
|
|
/* Offloading */
|
|
|
|
|
skb = handle_offloads(skb, mutable, rt);
|
2010-12-10 16:41:33 -08:00
|
|
|
if (IS_ERR(skb))
|
2010-08-27 13:55:02 -07:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
|
|
/* MTU */
|
|
|
|
|
if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) {
|
|
|
|
|
err = VPORT_E_TX_DROPPED;
|
|
|
|
|
goto error_free;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
/*
|
|
|
|
|
* If we are over the MTU, allow the IP stack to handle fragmentation.
|
|
|
|
|
* Fragmentation is a slow path anyways.
|
|
|
|
|
*/
|
|
|
|
|
if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
|
|
|
|
|
cache)) {
|
|
|
|
|
unattached_dst = &rt_dst(rt);
|
|
|
|
|
dst_hold(unattached_dst);
|
|
|
|
|
cache = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* TTL */
|
2011-01-26 12:28:59 -08:00
|
|
|
ttl = mutable->ttl;
|
2010-08-27 13:55:02 -07:00
|
|
|
if (!ttl)
|
2011-03-18 14:37:33 -07:00
|
|
|
ttl = ip4_dst_hoplimit(&rt_dst(rt));
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
if (mutable->flags & TNL_F_TTL_INHERIT) {
|
2010-08-10 20:11:48 -04:00
|
|
|
if (skb->protocol == htons(ETH_P_IP))
|
2010-08-27 13:55:02 -07:00
|
|
|
ttl = ip_hdr(skb)->ttl;
|
2010-08-10 20:11:48 -04:00
|
|
|
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
|
|
|
|
|
else if (skb->protocol == htons(ETH_P_IPV6))
|
2010-08-27 13:55:02 -07:00
|
|
|
ttl = ipv6_hdr(skb)->hop_limit;
|
2010-08-10 20:11:48 -04:00
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
while (skb) {
|
|
|
|
|
struct iphdr *iph;
|
|
|
|
|
struct sk_buff *next_skb = skb->next;
|
|
|
|
|
skb->next = NULL;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-12-29 22:13:15 -08:00
|
|
|
if (unlikely(vlan_deaccel_tag(skb)))
|
|
|
|
|
goto next;
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
if (likely(cache)) {
|
|
|
|
|
skb_push(skb, cache->len);
|
|
|
|
|
memcpy(skb->data, get_cached_header(cache), cache->len);
|
|
|
|
|
skb_reset_mac_header(skb);
|
2011-10-21 14:16:04 -07:00
|
|
|
skb_set_network_header(skb, cache->hh_len);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
} else {
|
|
|
|
|
skb_push(skb, mutable->tunnel_hlen);
|
|
|
|
|
create_tunnel_header(vport, mutable, rt, skb->data);
|
|
|
|
|
skb_reset_network_header(skb);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
if (next_skb)
|
|
|
|
|
skb_dst_set(skb, dst_clone(unattached_dst));
|
|
|
|
|
else {
|
|
|
|
|
skb_dst_set(skb, unattached_dst);
|
|
|
|
|
unattached_dst = NULL;
|
|
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
2010-08-27 13:55:02 -07:00
|
|
|
skb_set_transport_header(skb, skb_network_offset(skb) + sizeof(struct iphdr));
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
iph = ip_hdr(skb);
|
|
|
|
|
iph->tos = tos;
|
|
|
|
|
iph->ttl = ttl;
|
|
|
|
|
iph->frag_off = frag_off;
|
|
|
|
|
ip_select_ident(iph, &rt_dst(rt), NULL);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
skb = tnl_vport->tnl_ops->update_header(vport, mutable, &rt_dst(rt), skb);
|
|
|
|
|
if (unlikely(!skb))
|
|
|
|
|
goto next;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
if (likely(cache)) {
|
|
|
|
|
int orig_len = skb->len - cache->len;
|
2010-10-05 19:14:21 -07:00
|
|
|
struct vport *cache_vport = internal_dev_get_vport(rt_dst(rt).dev);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
skb->protocol = htons(ETH_P_IP);
|
2010-12-15 15:38:06 -08:00
|
|
|
iph = ip_hdr(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
iph->tot_len = htons(skb->len - skb_network_offset(skb));
|
|
|
|
|
ip_send_check(iph);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-10-05 19:14:21 -07:00
|
|
|
if (cache_vport) {
|
2011-05-27 15:53:49 -07:00
|
|
|
if (unlikely(compute_ip_summed(skb, true))) {
|
|
|
|
|
kfree_skb(skb);
|
|
|
|
|
goto next;
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
OVS_CB(skb)->flow = cache->flow;
|
2010-10-05 19:14:21 -07:00
|
|
|
vport_receive(cache_vport, skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
sent_len += orig_len;
|
|
|
|
|
} else {
|
2010-12-27 15:28:58 -08:00
|
|
|
int xmit_err;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
skb->dev = rt_dst(rt).dev;
|
2010-12-27 15:28:58 -08:00
|
|
|
xmit_err = dev_queue_xmit(skb);
|
2010-08-27 13:55:02 -07:00
|
|
|
|
2010-12-27 15:28:58 -08:00
|
|
|
if (likely(net_xmit_eval(xmit_err) == 0))
|
2010-08-27 13:55:02 -07:00
|
|
|
sent_len += orig_len;
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
sent_len += send_frags(skb, mutable);
|
|
|
|
|
|
|
|
|
|
next:
|
2010-08-10 20:11:48 -04:00
|
|
|
skb = next_skb;
|
2010-08-27 13:55:02 -07:00
|
|
|
}
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
if (unlikely(sent_len == 0))
|
2010-08-16 10:32:41 -04:00
|
|
|
vport_record_error(vport, VPORT_E_TX_DROPPED);
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
goto out;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
|
|
|
|
error_free:
|
2010-08-27 13:55:02 -07:00
|
|
|
tnl_free_linked_skbs(skb);
|
2010-08-10 20:11:48 -04:00
|
|
|
error:
|
2010-08-27 13:55:02 -07:00
|
|
|
vport_record_error(vport, err);
|
|
|
|
|
out:
|
2011-03-04 12:56:25 -08:00
|
|
|
dst_release(unattached_dst);
|
2010-08-27 13:55:02 -07:00
|
|
|
return sent_len;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
static const struct nla_policy tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
|
|
|
|
|
[OVS_TUNNEL_ATTR_FLAGS] = { .type = NLA_U32 },
|
|
|
|
|
[OVS_TUNNEL_ATTR_DST_IPV4] = { .type = NLA_U32 },
|
|
|
|
|
[OVS_TUNNEL_ATTR_SRC_IPV4] = { .type = NLA_U32 },
|
|
|
|
|
[OVS_TUNNEL_ATTR_OUT_KEY] = { .type = NLA_U64 },
|
|
|
|
|
[OVS_TUNNEL_ATTR_IN_KEY] = { .type = NLA_U64 },
|
|
|
|
|
[OVS_TUNNEL_ATTR_TOS] = { .type = NLA_U8 },
|
|
|
|
|
[OVS_TUNNEL_ATTR_TTL] = { .type = NLA_U8 },
|
2011-01-26 12:28:59 -08:00
|
|
|
};
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
/* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be zeroed. */
|
2011-01-26 12:28:59 -08:00
|
|
|
static int tnl_set_config(struct nlattr *options, const struct tnl_ops *tnl_ops,
|
2011-01-06 13:18:10 -08:00
|
|
|
const struct vport *cur_vport,
|
|
|
|
|
struct tnl_mutable_config *mutable)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
const struct vport *old_vport;
|
|
|
|
|
const struct tnl_mutable_config *old_mutable;
|
2011-08-18 10:35:40 -07:00
|
|
|
struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
|
2011-01-26 12:28:59 -08:00
|
|
|
int err;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
if (!options)
|
2010-08-27 13:55:02 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
err = nla_parse_nested(a, OVS_TUNNEL_ATTR_MAX, options, tnl_policy);
|
2011-01-26 12:28:59 -08:00
|
|
|
if (err)
|
|
|
|
|
return err;
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
if (!a[OVS_TUNNEL_ATTR_FLAGS] || !a[OVS_TUNNEL_ATTR_DST_IPV4])
|
2010-08-27 13:55:02 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
mutable->flags = nla_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_PUBLIC;
|
2011-01-26 12:28:59 -08:00
|
|
|
|
2011-09-30 14:32:31 -07:00
|
|
|
mutable->key.daddr = nla_get_be32(a[OVS_TUNNEL_ATTR_DST_IPV4]);
|
2011-10-24 12:27:36 -07:00
|
|
|
if (a[OVS_TUNNEL_ATTR_SRC_IPV4]) {
|
|
|
|
|
if (ipv4_is_multicast(mutable->key.daddr))
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
mutable->key.saddr = nla_get_be32(a[OVS_TUNNEL_ATTR_SRC_IPV4]);
|
|
|
|
|
}
|
2011-01-26 12:28:59 -08:00
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
if (a[OVS_TUNNEL_ATTR_TOS]) {
|
|
|
|
|
mutable->tos = nla_get_u8(a[OVS_TUNNEL_ATTR_TOS]);
|
2011-01-26 12:28:59 -08:00
|
|
|
if (mutable->tos != RT_TOS(mutable->tos))
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
if (a[OVS_TUNNEL_ATTR_TTL])
|
|
|
|
|
mutable->ttl = nla_get_u8(a[OVS_TUNNEL_ATTR_TTL]);
|
2011-01-26 12:28:59 -08:00
|
|
|
|
2011-09-30 14:32:31 -07:00
|
|
|
mutable->key.tunnel_type = tnl_ops->tunnel_type;
|
2011-08-18 10:35:40 -07:00
|
|
|
if (!a[OVS_TUNNEL_ATTR_IN_KEY]) {
|
2011-09-30 14:32:31 -07:00
|
|
|
mutable->key.tunnel_type |= TNL_T_KEY_MATCH;
|
2011-01-26 12:28:59 -08:00
|
|
|
mutable->flags |= TNL_F_IN_KEY_MATCH;
|
|
|
|
|
} else {
|
2011-09-30 14:32:31 -07:00
|
|
|
mutable->key.tunnel_type |= TNL_T_KEY_EXACT;
|
|
|
|
|
mutable->key.in_key = nla_get_be64(a[OVS_TUNNEL_ATTR_IN_KEY]);
|
2011-01-26 12:28:59 -08:00
|
|
|
}
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
if (!a[OVS_TUNNEL_ATTR_OUT_KEY])
|
2011-01-26 12:28:59 -08:00
|
|
|
mutable->flags |= TNL_F_OUT_KEY_ACTION;
|
|
|
|
|
else
|
2011-08-18 10:35:40 -07:00
|
|
|
mutable->out_key = nla_get_be64(a[OVS_TUNNEL_ATTR_OUT_KEY]);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-02-24 21:45:18 -08:00
|
|
|
mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
|
|
|
|
|
if (mutable->tunnel_hlen < 0)
|
|
|
|
|
return mutable->tunnel_hlen;
|
|
|
|
|
|
|
|
|
|
mutable->tunnel_hlen += sizeof(struct iphdr);
|
|
|
|
|
|
2011-10-14 14:33:11 -07:00
|
|
|
old_vport = port_table_lookup(&mutable->key, &old_mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
if (old_vport && old_vport != cur_vport)
|
|
|
|
|
return -EEXIST;
|
|
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
mutable->mlink = 0;
|
|
|
|
|
if (ipv4_is_multicast(mutable->key.daddr)) {
|
|
|
|
|
struct net_device *dev;
|
|
|
|
|
struct rtable *rt;
|
|
|
|
|
|
|
|
|
|
rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos);
|
|
|
|
|
if (IS_ERR(rt))
|
|
|
|
|
return -EADDRNOTAVAIL;
|
|
|
|
|
dev = rt_dst(rt).dev;
|
|
|
|
|
ip_rt_put(rt);
|
|
|
|
|
if (__in_dev_get_rtnl(dev) == NULL)
|
|
|
|
|
return -EADDRNOTAVAIL;
|
|
|
|
|
mutable->mlink = dev->ifindex;
|
|
|
|
|
ip_mc_inc_group(__in_dev_get_rtnl(dev), mutable->key.daddr);
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-04 16:32:57 -07:00
|
|
|
struct vport *tnl_create(const struct vport_parms *parms,
|
2010-08-10 20:11:48 -04:00
|
|
|
const struct vport_ops *vport_ops,
|
|
|
|
|
const struct tnl_ops *tnl_ops)
|
|
|
|
|
{
|
|
|
|
|
struct vport *vport;
|
|
|
|
|
struct tnl_vport *tnl_vport;
|
2010-12-23 16:36:26 -08:00
|
|
|
struct tnl_mutable_config *mutable;
|
2010-08-16 10:32:41 -04:00
|
|
|
int initial_frag_id;
|
2010-08-10 20:11:48 -04:00
|
|
|
int err;
|
|
|
|
|
|
2010-12-03 13:09:26 -08:00
|
|
|
vport = vport_alloc(sizeof(struct tnl_vport), vport_ops, parms);
|
2010-08-10 20:11:48 -04:00
|
|
|
if (IS_ERR(vport)) {
|
|
|
|
|
err = PTR_ERR(vport);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
|
2010-11-04 16:32:57 -07:00
|
|
|
strcpy(tnl_vport->name, parms->name);
|
2010-08-10 20:11:48 -04:00
|
|
|
tnl_vport->tnl_ops = tnl_ops;
|
|
|
|
|
|
2010-12-23 16:36:26 -08:00
|
|
|
mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
|
|
|
|
|
if (!mutable) {
|
2010-08-10 20:11:48 -04:00
|
|
|
err = -ENOMEM;
|
|
|
|
|
goto error_free_vport;
|
|
|
|
|
}
|
|
|
|
|
|
2010-12-23 16:36:26 -08:00
|
|
|
vport_gen_rand_ether_addr(mutable->eth_addr);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-08-16 10:32:41 -04:00
|
|
|
get_random_bytes(&initial_frag_id, sizeof(int));
|
|
|
|
|
atomic_set(&tnl_vport->frag_id, initial_frag_id);
|
|
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
err = tnl_set_config(parms->options, tnl_ops, NULL, mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
if (err)
|
|
|
|
|
goto error_free_mutable;
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
spin_lock_init(&tnl_vport->cache_lock);
|
|
|
|
|
|
|
|
|
|
#ifdef NEED_CACHE_TIMEOUT
|
|
|
|
|
tnl_vport->cache_exp_interval = MAX_CACHE_EXP -
|
2010-12-23 16:36:26 -08:00
|
|
|
(net_random() % (MAX_CACHE_EXP / 2));
|
2010-08-27 13:55:02 -07:00
|
|
|
#endif
|
|
|
|
|
|
2010-12-23 16:36:26 -08:00
|
|
|
rcu_assign_pointer(tnl_vport->mutable, mutable);
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
port_table_add_port(vport);
|
2010-08-10 20:11:48 -04:00
|
|
|
return vport;
|
|
|
|
|
|
|
|
|
|
error_free_mutable:
|
2011-10-24 12:27:36 -07:00
|
|
|
free_mutable_rtnl(mutable);
|
2010-12-23 16:36:26 -08:00
|
|
|
kfree(mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
error_free_vport:
|
|
|
|
|
vport_free(vport);
|
|
|
|
|
error:
|
|
|
|
|
return ERR_PTR(err);
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
int tnl_set_options(struct vport *vport, struct nlattr *options)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
2011-01-26 12:28:59 -08:00
|
|
|
const struct tnl_mutable_config *old_mutable;
|
2010-08-10 20:11:48 -04:00
|
|
|
struct tnl_mutable_config *mutable;
|
|
|
|
|
int err;
|
|
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
|
2010-08-10 20:11:48 -04:00
|
|
|
if (!mutable) {
|
|
|
|
|
err = -ENOMEM;
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
/* Copy fields whose values should be retained. */
|
|
|
|
|
old_mutable = rtnl_dereference(tnl_vport->mutable);
|
|
|
|
|
mutable->seq = old_mutable->seq + 1;
|
|
|
|
|
memcpy(mutable->eth_addr, old_mutable->eth_addr, ETH_ALEN);
|
|
|
|
|
|
|
|
|
|
/* Parse the others configured by userspace. */
|
|
|
|
|
err = tnl_set_config(options, tnl_vport->tnl_ops, vport, mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
if (err)
|
|
|
|
|
goto error_free;
|
|
|
|
|
|
2011-09-30 14:32:31 -07:00
|
|
|
if (port_hash(&mutable->key) != port_hash(&old_mutable->key))
|
2011-09-09 19:09:47 -07:00
|
|
|
port_table_move_port(vport, mutable);
|
2011-10-17 11:32:23 -07:00
|
|
|
else
|
|
|
|
|
assign_config_rcu(vport, mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
error_free:
|
2011-10-24 12:27:36 -07:00
|
|
|
free_mutable_rtnl(mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
kfree(mutable);
|
|
|
|
|
error:
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-26 12:28:59 -08:00
|
|
|
int tnl_get_options(const struct vport *vport, struct sk_buff *skb)
|
|
|
|
|
{
|
|
|
|
|
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
const struct tnl_mutable_config *mutable = rcu_dereference_rtnl(tnl_vport->mutable);
|
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
NLA_PUT_U32(skb, OVS_TUNNEL_ATTR_FLAGS, mutable->flags & TNL_F_PUBLIC);
|
2011-09-30 14:32:31 -07:00
|
|
|
NLA_PUT_BE32(skb, OVS_TUNNEL_ATTR_DST_IPV4, mutable->key.daddr);
|
2011-01-26 12:28:59 -08:00
|
|
|
|
|
|
|
|
if (!(mutable->flags & TNL_F_IN_KEY_MATCH))
|
2011-09-30 14:32:31 -07:00
|
|
|
NLA_PUT_BE64(skb, OVS_TUNNEL_ATTR_IN_KEY, mutable->key.in_key);
|
2011-01-26 12:28:59 -08:00
|
|
|
if (!(mutable->flags & TNL_F_OUT_KEY_ACTION))
|
2011-08-18 10:35:40 -07:00
|
|
|
NLA_PUT_BE64(skb, OVS_TUNNEL_ATTR_OUT_KEY, mutable->out_key);
|
2011-09-30 14:32:31 -07:00
|
|
|
if (mutable->key.saddr)
|
|
|
|
|
NLA_PUT_BE32(skb, OVS_TUNNEL_ATTR_SRC_IPV4, mutable->key.saddr);
|
2011-01-26 12:28:59 -08:00
|
|
|
if (mutable->tos)
|
2011-08-18 10:35:40 -07:00
|
|
|
NLA_PUT_U8(skb, OVS_TUNNEL_ATTR_TOS, mutable->tos);
|
2011-01-26 12:28:59 -08:00
|
|
|
if (mutable->ttl)
|
2011-08-18 10:35:40 -07:00
|
|
|
NLA_PUT_U8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl);
|
2011-01-26 12:28:59 -08:00
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
nla_put_failure:
|
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
static void free_port_rcu(struct rcu_head *rcu)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
2010-12-23 16:44:22 -08:00
|
|
|
struct tnl_vport *tnl_vport = container_of(rcu,
|
|
|
|
|
struct tnl_vport, rcu);
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2010-12-23 16:44:22 -08:00
|
|
|
free_cache((struct tnl_cache __force *)tnl_vport->cache);
|
|
|
|
|
kfree((struct tnl_mutable __force *)tnl_vport->mutable);
|
2010-08-10 20:11:48 -04:00
|
|
|
vport_free(tnl_vport_to_vport(tnl_vport));
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-09 19:09:47 -07:00
|
|
|
void tnl_destroy(struct vport *vport)
|
2010-08-10 20:11:48 -04:00
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
2011-10-24 12:27:36 -07:00
|
|
|
struct tnl_mutable_config *mutable;
|
2010-12-23 16:28:23 -08:00
|
|
|
|
|
|
|
|
mutable = rtnl_dereference(tnl_vport->mutable);
|
2011-09-09 19:09:47 -07:00
|
|
|
port_table_remove_port(vport);
|
2011-10-24 12:27:36 -07:00
|
|
|
free_mutable_rtnl(mutable);
|
2010-08-27 13:55:02 -07:00
|
|
|
call_rcu(&tnl_vport->rcu, free_port_rcu);
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int tnl_set_addr(struct vport *vport, const unsigned char *addr)
|
|
|
|
|
{
|
|
|
|
|
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
2011-10-24 12:27:36 -07:00
|
|
|
struct tnl_mutable_config *old_mutable, *mutable;
|
2010-08-10 20:11:48 -04:00
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
old_mutable = rtnl_dereference(tnl_vport->mutable);
|
|
|
|
|
mutable = kmemdup(old_mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL);
|
2010-08-10 20:11:48 -04:00
|
|
|
if (!mutable)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
2011-10-24 12:27:36 -07:00
|
|
|
old_mutable->mlink = 0;
|
|
|
|
|
|
2010-08-10 20:11:48 -04:00
|
|
|
memcpy(mutable->eth_addr, addr, ETH_ALEN);
|
|
|
|
|
assign_config_rcu(vport, mutable);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *tnl_get_name(const struct vport *vport)
|
|
|
|
|
{
|
|
|
|
|
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
|
|
|
|
return tnl_vport->name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const unsigned char *tnl_get_addr(const struct vport *vport)
|
|
|
|
|
{
|
|
|
|
|
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
|
2010-12-06 15:15:47 -08:00
|
|
|
return rcu_dereference_rtnl(tnl_vport->mutable)->eth_addr;
|
2010-08-10 20:11:48 -04:00
|
|
|
}
|
|
|
|
|
|
2010-08-27 13:55:02 -07:00
|
|
|
void tnl_free_linked_skbs(struct sk_buff *skb)
|
|
|
|
|
{
|
|
|
|
|
while (skb) {
|
|
|
|
|
struct sk_buff *next = skb->next;
|
|
|
|
|
kfree_skb(skb);
|
|
|
|
|
skb = next;
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-09-09 19:09:47 -07:00
|
|
|
|
|
|
|
|
int tnl_init(void)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
port_table = kmalloc(PORT_TABLE_SIZE * sizeof(struct hlist_head *),
|
|
|
|
|
GFP_KERNEL);
|
|
|
|
|
if (!port_table)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < PORT_TABLE_SIZE; i++)
|
|
|
|
|
INIT_HLIST_HEAD(&port_table[i]);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void tnl_exit(void)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < PORT_TABLE_SIZE; i++) {
|
|
|
|
|
struct tnl_vport * tnl_vport;
|
|
|
|
|
struct hlist_head *hash_head;
|
|
|
|
|
struct hlist_node *n;
|
|
|
|
|
|
|
|
|
|
hash_head = &port_table[i];
|
|
|
|
|
hlist_for_each_entry(tnl_vport, n, hash_head, hash_node) {
|
|
|
|
|
BUG();
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out:
|
|
|
|
|
kfree(port_table);
|
|
|
|
|
}
|