2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-25 15:07:05 +00:00

Tunnel: Cleanup old tunnel infrastructure.

Since userspace flow based tunneling code is checked in, the kernel
port based tunneling code can be removed.

Patch removes following components:
 - tunnel ports hash table and moved tunnel ports list to individual
   vports.
 - Cleaned per tnl-port config.
 - OVS_KEY_ATTR_TUN_ID action is removed.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

Bug #15078
This commit is contained in:
Pravin B Shelar
2013-03-04 13:00:25 -08:00
parent 0d0673857b
commit 85c9de194b
22 changed files with 361 additions and 1369 deletions

View File

@@ -38,8 +38,7 @@
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len,
struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb);
const struct nlattr *attr, int len, bool keep_skb);
static int make_writable(struct sk_buff *skb, int write_len)
{
@@ -399,8 +398,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
}
static int sample(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr,
struct ovs_key_ipv4_tunnel *tun_key)
const struct nlattr *attr)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
@@ -421,12 +419,11 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
}
return do_execute_actions(dp, skb, nla_data(acts_list),
nla_len(acts_list), tun_key, true);
nla_len(acts_list), true);
}
static int execute_set_action(struct sk_buff *skb,
const struct nlattr *nested_attr,
struct ovs_key_ipv4_tunnel *tun_key)
const struct nlattr *nested_attr)
{
int err = 0;
@@ -439,22 +436,6 @@ static int execute_set_action(struct sk_buff *skb,
skb_set_mark(skb, nla_get_u32(nested_attr));
break;
case OVS_KEY_ATTR_TUN_ID:
/* If we're only using the TUN_ID action, store the value in a
* temporary instance of struct ovs_key_ipv4_tunnel on the stack.
* If both IPV4_TUNNEL and TUN_ID are being used together we
* can't write into the IPV4_TUNNEL action, so make a copy and
* write into that version.
*/
if (!OVS_CB(skb)->tun_key)
memset(tun_key, 0, sizeof(*tun_key));
else if (OVS_CB(skb)->tun_key != tun_key)
memcpy(tun_key, OVS_CB(skb)->tun_key, sizeof(*tun_key));
OVS_CB(skb)->tun_key = tun_key;
OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr);
break;
case OVS_KEY_ATTR_IPV4_TUNNEL:
OVS_CB(skb)->tun_key = nla_data(nested_attr);
break;
@@ -485,8 +466,7 @@ static int execute_set_action(struct sk_buff *skb,
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len,
struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb)
const struct nlattr *attr, int len, bool keep_skb)
{
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
@@ -525,11 +505,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, nla_data(a), tun_key);
err = execute_set_action(skb, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, a, tun_key);
err = sample(dp, skb, a);
break;
}
@@ -576,7 +556,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
struct loop_counter *loop;
int error;
struct ovs_key_ipv4_tunnel tun_key;
/* Check whether we've looped too much. */
loop = &__get_cpu_var(loop_counters);
@@ -590,7 +569,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
OVS_CB(skb)->tun_key = NULL;
error = do_execute_actions(dp, skb, acts->actions,
acts->actions_len, &tun_key, false);
acts->actions_len, false);
/* Check whether sub-actions looped too much. */
if (unlikely(loop->looping))

View File

@@ -604,7 +604,6 @@ static int validate_set(const struct nlattr *a,
int err;
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_TUN_ID:
case OVS_KEY_ATTR_ETHERNET:
break;
@@ -2316,13 +2315,9 @@ static int __init dp_init(void)
if (err)
goto error_genl_exec;
err = ovs_tnl_init();
if (err)
goto error_wq;
err = ovs_flow_init();
if (err)
goto error_tnl_exit;
goto error_wq;
err = ovs_vport_init();
if (err)
@@ -2352,8 +2347,6 @@ error_vport_exit:
ovs_vport_exit();
error_flow_exit:
ovs_flow_exit();
error_tnl_exit:
ovs_tnl_exit();
error_wq:
ovs_workqueues_exit();
error_genl_exec:
@@ -2371,7 +2364,6 @@ static void dp_cleanup(void)
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
ovs_tnl_exit();
ovs_workqueues_exit();
genl_exec_exit();
}

View File

@@ -141,6 +141,7 @@ struct dp_upcall_info {
*/
struct ovs_net {
struct list_head dps;
struct vport_net vport_net;
};
extern int ovs_net_id;

View File

@@ -851,9 +851,6 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
[OVS_KEY_ATTR_TUNNEL] = -1,
/* Not upstream. */
[OVS_KEY_ATTR_TUN_ID] = sizeof(__be64),
};
static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
@@ -1137,25 +1134,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
}
if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) &&
attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
__be64 tun_id;
err = ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
if (err)
return err;
if (!(swkey->tun_key.tun_flags & OVS_TNL_F_KEY))
return -EINVAL;
tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
if (tun_id != swkey->tun_key.tun_id)
return -EINVAL;
attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
} else if (attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
if (attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
err = ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
if (err)
return err;
@@ -1305,7 +1284,6 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const stru
struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
const struct nlattr *nla;
int rem;
__be64 tun_id = 0;
flow->key.phy.in_port = DP_MAX_PORTS;
flow->key.phy.priority = 0;
@@ -1326,38 +1304,10 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const stru
flow->key.phy.priority = nla_get_u32(nla);
break;
case OVS_KEY_ATTR_TUN_ID:
tun_id = nla_get_be64(nla);
if (tun_key->ipv4_dst) {
if (!(tun_key->tun_flags & OVS_TNL_F_KEY))
return -EINVAL;
if (tun_key->tun_id != tun_id)
return -EINVAL;
break;
}
tun_key->tun_id = tun_id;
tun_key->tun_flags |= OVS_TNL_F_KEY;
break;
case OVS_KEY_ATTR_TUNNEL:
if (tun_key->tun_flags & OVS_TNL_F_KEY) {
tun_id = tun_key->tun_id;
err = ipv4_tun_from_nlattr(nla, tun_key);
if (err)
return err;
if (!(tun_key->tun_flags & OVS_TNL_F_KEY))
return -EINVAL;
if (tun_key->tun_id != tun_id)
return -EINVAL;
} else {
err = ipv4_tun_from_nlattr(nla, tun_key);
if (err)
return err;
}
break;
case OVS_KEY_ATTR_IN_PORT:
@@ -1398,10 +1348,6 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
ipv4_tun_to_nlattr(skb, &swkey->tun_key))
goto nla_put_failure;
if ((swkey->tun_key.tun_flags & OVS_TNL_F_KEY) &&
nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun_key.tun_id))
goto nla_put_failure;
if (swkey->phy.in_port != DP_MAX_PORTS &&
nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
goto nla_put_failure;

View File

@@ -164,7 +164,6 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* struct pad nl hdr total
* ------ --- ------ -----
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_TUN_ID 8 -- 4 12
* OVS_KEY_ATTR_TUNNEL 0 -- 4 4
* - OVS_TUNNEL_KEY_ATTR_ID 8 -- 4 12
* - OVS_TUNNEL_KEY_ATTR_IPV4_SRC 4 -- 4 8
@@ -184,9 +183,9 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* ----------------------------------------------------------
* total 220
* total 208
*/
#define FLOW_BUFSIZE 220
#define FLOW_BUFSIZE 208
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,

View File

@@ -18,11 +18,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/if_vlan.h>
#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/in_route.h>
#include <linux/inetdevice.h>
@@ -32,15 +27,6 @@
#include <linux/version.h>
#include <linux/workqueue.h>
#include <linux/rculist.h>
#include <net/dsfield.h>
#include <net/dst.h>
#include <net/icmp.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#include <net/ipv6.h>
#endif
#include <net/route.h>
#include <net/xfrm.h>
@@ -50,24 +36,6 @@
#include "tunnel.h"
#include "vlan.h"
#include "vport.h"
#include "vport-internal_dev.h"
#define PORT_TABLE_SIZE 1024
static struct hlist_head *port_table __read_mostly;
/*
* These are just used as an optimization: they don't require any kind of
* synchronization because we could have just as easily read the value before
* the port change happened.
*/
static unsigned int key_local_remote_ports __read_mostly;
static unsigned int key_remote_ports __read_mostly;
static unsigned int key_multicast_ports __read_mostly;
static unsigned int local_remote_ports __read_mostly;
static unsigned int remote_ports __read_mostly;
static unsigned int null_ports __read_mostly;
static unsigned int multicast_ports __read_mostly;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
#define rt_dst(rt) (rt->dst)
@@ -75,247 +43,6 @@ static unsigned int multicast_ports __read_mostly;
#define rt_dst(rt) (rt->u.dst)
#endif
static struct vport *tnl_vport_to_vport(const struct tnl_vport *tnl_vport)
{
return vport_from_priv(tnl_vport);
}
static void free_config_rcu(struct rcu_head *rcu)
{
struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu);
kfree(c);
}
/* Frees the portion of 'mutable' that requires RTNL and thus can't happen
* within an RCU callback. Fortunately this part doesn't require waiting for
* an RCU grace period.
*/
static void free_mutable_rtnl(struct tnl_mutable_config *mutable)
{
ASSERT_RTNL();
if (ipv4_is_multicast(mutable->key.daddr) && mutable->mlink) {
struct in_device *in_dev;
in_dev = inetdev_by_index(port_key_get_net(&mutable->key), mutable->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, mutable->key.daddr);
}
}
static void assign_config_rcu(struct vport *vport,
struct tnl_mutable_config *new_config)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *old_config;
old_config = rtnl_dereference(tnl_vport->mutable);
rcu_assign_pointer(tnl_vport->mutable, new_config);
free_mutable_rtnl(old_config);
call_rcu(&old_config->rcu, free_config_rcu);
}
static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable)
{
bool is_multicast = ipv4_is_multicast(mutable->key.daddr);
if (mutable->flags & TNL_F_IN_KEY_MATCH) {
if (mutable->key.saddr)
return &local_remote_ports;
else if (is_multicast)
return &multicast_ports;
else
return &remote_ports;
} else {
if (mutable->key.saddr)
return &key_local_remote_ports;
else if (is_multicast)
return &key_multicast_ports;
else if (mutable->key.daddr)
return &key_remote_ports;
else
return &null_ports;
}
}
static u32 port_hash(const struct port_lookup_key *key)
{
return jhash2((u32 *)key, (PORT_KEY_LEN / sizeof(u32)), 0);
}
static struct hlist_head *find_bucket(u32 hash)
{
return &port_table[(hash & (PORT_TABLE_SIZE - 1))];
}
static void port_table_add_port(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable;
u32 hash;
mutable = rtnl_dereference(tnl_vport->mutable);
hash = port_hash(&mutable->key);
hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(hash));
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
}
static void port_table_move_port(struct vport *vport,
struct tnl_mutable_config *new_mutable)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
u32 hash;
hash = port_hash(&new_mutable->key);
hlist_del_init_rcu(&tnl_vport->hash_node);
hlist_add_head_rcu(&tnl_vport->hash_node, find_bucket(hash));
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
assign_config_rcu(vport, new_mutable);
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++;
}
static void port_table_remove_port(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
hlist_del_init_rcu(&tnl_vport->hash_node);
(*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--;
}
static struct vport *port_table_lookup(struct port_lookup_key *key,
const struct tnl_mutable_config **pmutable)
{
struct hlist_node *n;
struct hlist_head *bucket;
u32 hash = port_hash(key);
struct tnl_vport *tnl_vport;
bucket = find_bucket(hash);
hlist_for_each_entry_rcu(tnl_vport, n, bucket, hash_node) {
struct tnl_mutable_config *mutable;
mutable = rcu_dereference_rtnl(tnl_vport->mutable);
if (!memcmp(&mutable->key, key, PORT_KEY_LEN)) {
*pmutable = mutable;
return tnl_vport_to_vport(tnl_vport);
}
}
return NULL;
}
struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
__be64 key, int tunnel_type,
const struct tnl_mutable_config **mutable)
{
struct port_lookup_key lookup;
struct vport *vport;
bool is_multicast = ipv4_is_multicast(saddr);
port_key_set_net(&lookup, net);
lookup.saddr = saddr;
lookup.daddr = daddr;
/* First try for exact match on in_key. */
lookup.in_key = key;
lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT;
if (!is_multicast && key_local_remote_ports) {
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
}
if (key_remote_ports) {
lookup.saddr = 0;
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
lookup.saddr = saddr;
}
/* Then try matches that wildcard in_key. */
lookup.in_key = 0;
lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH;
if (!is_multicast && local_remote_ports) {
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
}
if (remote_ports) {
lookup.saddr = 0;
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
}
if (is_multicast) {
lookup.saddr = 0;
lookup.daddr = saddr;
if (key_multicast_ports) {
lookup.tunnel_type = tunnel_type | TNL_T_KEY_EXACT;
lookup.in_key = key;
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
}
if (multicast_ports) {
lookup.tunnel_type = tunnel_type | TNL_T_KEY_MATCH;
lookup.in_key = 0;
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
}
}
if (null_ports) {
lookup.daddr = 0;
lookup.saddr = 0;
lookup.in_key = 0;
lookup.tunnel_type = tunnel_type;
vport = port_table_lookup(&lookup, mutable);
if (vport)
return vport;
}
return NULL;
}
static void ecn_decapsulate(struct sk_buff *skb)
{
if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) {
__be16 protocol = skb->protocol;
skb_set_network_header(skb, ETH_HLEN);
if (protocol == htons(ETH_P_8021Q)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
return;
protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
skb_set_network_header(skb, VLAN_ETH_HLEN);
}
if (protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+ sizeof(struct iphdr))))
return;
IP_ECN_set_ce(ip_hdr(skb));
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (protocol == htons(ETH_P_IPV6)) {
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+ sizeof(struct ipv6hdr))))
return;
IP6_ECN_set_ce(ipv6_hdr(skb));
}
#endif
}
}
/**
* ovs_tnl_rcv - ingress point for generic tunnel code
*
@@ -347,8 +74,6 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
nf_reset(skb);
skb_clear_rxhash(skb);
secpath_reset(skb);
ecn_decapsulate(skb);
vlan_set_tci(skb, 0);
if (unlikely(compute_ip_summed(skb, false))) {
@@ -417,7 +142,6 @@ static bool need_linearize(const struct sk_buff *skb)
}
static struct sk_buff *handle_offloads(struct sk_buff *skb,
const struct tnl_mutable_config *mutable,
const struct rtable *rt,
int tunnel_hlen)
{
@@ -479,39 +203,6 @@ error:
return ERR_PTR(err);
}
static int send_frags(struct sk_buff *skb,
int tunnel_hlen)
{
int sent_len;
sent_len = 0;
while (skb) {
struct sk_buff *next = skb->next;
int frag_len = skb->len - tunnel_hlen;
int err;
skb->next = NULL;
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
err = ip_local_out(skb);
skb = next;
if (unlikely(net_xmit_eval(err)))
goto free_frags;
sent_len += frag_len;
}
return sent_len;
free_frags:
/*
* There's no point in continuing to send fragments once one has been
* dropped so just free the rest. This may help improve the congestion
* that caused the first packet to be dropped.
*/
ovs_tnl_free_linked_skbs(skb);
return sent_len;
}
/* Compute source UDP port for outgoing packet.
* Currently we use the flow hash.
*/
@@ -530,143 +221,53 @@ u16 ovs_tnl_get_src_port(struct sk_buff *skb)
int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
enum vport_err_type err = VPORT_E_TX_ERROR;
struct rtable *rt;
struct ovs_key_ipv4_tunnel tun_key;
__be32 saddr;
int sent_len = 0;
int tunnel_hlen;
__be16 frag_off;
__be32 daddr;
__be32 saddr;
u32 skb_mark;
u8 ttl;
u8 tos;
/* Validate the protocol headers before we try to use them. */
if (skb->protocol == htons(ETH_P_8021Q) &&
!vlan_tx_tag_present(skb)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
if (unlikely(!OVS_CB(skb)->tun_key))
goto error_free;
skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
skb_set_network_header(skb, VLAN_ETH_HLEN);
}
if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+ sizeof(struct iphdr))))
skb->protocol = 0;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6)) {
if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+ sizeof(struct ipv6hdr))))
skb->protocol = 0;
}
#endif
/* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key here,
* and zero it out.
*/
if (!OVS_CB(skb)->tun_key) {
memset(&tun_key, 0, sizeof(tun_key));
OVS_CB(skb)->tun_key = &tun_key;
}
tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, OVS_CB(skb)->tun_key);
if (unlikely(tunnel_hlen < 0)) {
err = VPORT_E_TX_DROPPED;
goto error_free;
}
tunnel_hlen += sizeof(struct iphdr);
if (OVS_CB(skb)->tun_key->ipv4_dst) {
daddr = OVS_CB(skb)->tun_key->ipv4_dst;
saddr = OVS_CB(skb)->tun_key->ipv4_src;
tos = OVS_CB(skb)->tun_key->ipv4_tos;
ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
frag_off = OVS_CB(skb)->tun_key->tun_flags &
OVS_TNL_F_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
u8 inner_tos;
daddr = mutable->key.daddr;
saddr = mutable->key.saddr;
if (unlikely(!daddr)) {
/* Trying to sent packet from Null-port without
* tunnel info? Drop this packet. */
err = VPORT_E_TX_DROPPED;
goto error_free;
}
/* ToS */
if (skb->protocol == htons(ETH_P_IP))
inner_tos = ip_hdr(skb)->tos;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
#endif
else
inner_tos = 0;
if (mutable->flags & TNL_F_TOS_INHERIT)
tos = inner_tos;
else
tos = mutable->tos;
tos = INET_ECN_encapsulate(tos, inner_tos);
/* TTL */
ttl = mutable->ttl;
if (mutable->flags & TNL_F_TTL_INHERIT) {
if (skb->protocol == htons(ETH_P_IP))
ttl = ip_hdr(skb)->ttl;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6))
ttl = ipv6_hdr(skb)->hop_limit;
#endif
}
frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
}
/* Route lookup */
skb_mark = skb_get_mark(skb);
rt = find_route(port_key_get_net(&mutable->key), &saddr, daddr,
tnl_vport->tnl_ops->ipproto, tos, skb_mark);
saddr = OVS_CB(skb)->tun_key->ipv4_src;
rt = find_route(ovs_dp_get_net(vport->dp),
&saddr,
OVS_CB(skb)->tun_key->ipv4_dst,
tnl_vport->tnl_ops->ipproto,
OVS_CB(skb)->tun_key->ipv4_tos,
skb_get_mark(skb));
if (IS_ERR(rt))
goto error_free;
/* Offloading */
tunnel_hlen = tnl_vport->tnl_ops->hdr_len(OVS_CB(skb)->tun_key);
tunnel_hlen += sizeof(struct iphdr);
skb = handle_offloads(skb, rt, tunnel_hlen);
if (IS_ERR(skb)) {
skb = NULL;
goto err_free_rt;
}
/* Reset SKB */
nf_reset(skb);
secpath_reset(skb);
skb_dst_drop(skb);
skb_clear_rxhash(skb);
/* Offloading */
skb = handle_offloads(skb, mutable, rt, tunnel_hlen);
if (IS_ERR(skb)) {
skb = NULL;
goto err_free_rt;
}
/* TTL Fixup. */
if (!OVS_CB(skb)->tun_key->ipv4_dst) {
if (!(mutable->flags & TNL_F_TTL_INHERIT)) {
if (!ttl)
ttl = ip4_dst_hoplimit(&rt_dst(rt));
}
}
while (skb) {
struct iphdr *iph;
struct sk_buff *next_skb = skb->next;
struct iphdr *iph;
int frag_len;
skb->next = NULL;
if (unlikely(vlan_deaccel_tag(skb)))
goto next;
frag_len = skb->len;
skb_push(skb, tunnel_hlen);
skb_reset_network_header(skb);
skb_set_transport_header(skb, sizeof(struct iphdr));
@@ -676,25 +277,29 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
else
skb_dst_set(skb, &rt_dst(rt));
/* Push Tunnel header. */
tnl_vport->tnl_ops->build_header(vport, skb, tunnel_hlen);
/* Push IP header. */
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->protocol = tnl_vport->tnl_ops->ipproto;
iph->daddr = daddr;
iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst;
iph->saddr = saddr;
iph->tos = tos;
iph->ttl = ttl;
iph->frag_off = frag_off;
iph->tos = OVS_CB(skb)->tun_key->ipv4_tos;
iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
iph->frag_off = OVS_CB(skb)->tun_key->tun_flags &
OVS_TNL_F_DONT_FRAGMENT ? htons(IP_DF) : 0;
ip_select_ident(iph, &rt_dst(rt), NULL);
/* Push Tunnel header. */
skb = tnl_vport->tnl_ops->build_header(vport, mutable,
&rt_dst(rt), skb, tunnel_hlen);
if (unlikely(!skb))
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
err = ip_local_out(skb);
if (unlikely(net_xmit_eval(err)))
goto next;
sent_len += send_frags(skb, tunnel_hlen);
sent_len += frag_len;
next:
skb = next_skb;
@@ -708,122 +313,17 @@ next:
err_free_rt:
ip_rt_put(rt);
error_free:
ovs_tnl_free_linked_skbs(skb);
kfree_skb(skb);
ovs_vport_record_error(vport, err);
return sent_len;
}
static const struct nla_policy tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
[OVS_TUNNEL_ATTR_FLAGS] = { .type = NLA_U32 },
[OVS_TUNNEL_ATTR_DST_IPV4] = { .type = NLA_U32 },
[OVS_TUNNEL_ATTR_SRC_IPV4] = { .type = NLA_U32 },
[OVS_TUNNEL_ATTR_OUT_KEY] = { .type = NLA_U64 },
[OVS_TUNNEL_ATTR_IN_KEY] = { .type = NLA_U64 },
[OVS_TUNNEL_ATTR_TOS] = { .type = NLA_U8 },
[OVS_TUNNEL_ATTR_TTL] = { .type = NLA_U8 },
[OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
};
/* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
* zeroed. */
static int tnl_set_config(struct net *net, struct nlattr *options,
const struct tnl_ops *tnl_ops,
const struct vport *cur_vport,
struct tnl_mutable_config *mutable)
{
const struct vport *old_vport;
const struct tnl_mutable_config *old_mutable;
struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
int err;
port_key_set_net(&mutable->key, net);
mutable->key.tunnel_type = tnl_ops->tunnel_type;
if (!options)
goto out;
err = nla_parse_nested(a, OVS_TUNNEL_ATTR_MAX, options, tnl_policy);
if (err)
return err;
/* Process attributes possibly useful for null_ports first */
if (a[OVS_TUNNEL_ATTR_DST_PORT])
mutable->dst_port =
htons(nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]));
if (a[OVS_TUNNEL_ATTR_DST_IPV4])
mutable->key.daddr = nla_get_be32(a[OVS_TUNNEL_ATTR_DST_IPV4]);
/* Skip the rest if configuring a null_port */
if (!mutable->key.daddr)
goto out;
if (a[OVS_TUNNEL_ATTR_FLAGS])
mutable->flags = nla_get_u32(a[OVS_TUNNEL_ATTR_FLAGS])
& TNL_F_PUBLIC;
if (a[OVS_TUNNEL_ATTR_SRC_IPV4]) {
if (ipv4_is_multicast(mutable->key.daddr))
return -EINVAL;
mutable->key.saddr = nla_get_be32(a[OVS_TUNNEL_ATTR_SRC_IPV4]);
}
if (a[OVS_TUNNEL_ATTR_TOS]) {
mutable->tos = nla_get_u8(a[OVS_TUNNEL_ATTR_TOS]);
/* Reject ToS config with ECN bits set. */
if (mutable->tos & INET_ECN_MASK)
return -EINVAL;
}
if (a[OVS_TUNNEL_ATTR_TTL])
mutable->ttl = nla_get_u8(a[OVS_TUNNEL_ATTR_TTL]);
if (!a[OVS_TUNNEL_ATTR_IN_KEY]) {
mutable->key.tunnel_type |= TNL_T_KEY_MATCH;
mutable->flags |= TNL_F_IN_KEY_MATCH;
} else {
mutable->key.tunnel_type |= TNL_T_KEY_EXACT;
mutable->key.in_key = nla_get_be64(a[OVS_TUNNEL_ATTR_IN_KEY]);
}
if (!a[OVS_TUNNEL_ATTR_OUT_KEY])
mutable->flags |= TNL_F_OUT_KEY_ACTION;
else
mutable->out_key = nla_get_be64(a[OVS_TUNNEL_ATTR_OUT_KEY]);
mutable->mlink = 0;
if (ipv4_is_multicast(mutable->key.daddr)) {
struct net_device *dev;
struct rtable *rt;
__be32 saddr = mutable->key.saddr;
rt = find_route(port_key_get_net(&mutable->key),
&saddr, mutable->key.daddr,
tnl_ops->ipproto, mutable->tos, 0);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
dev = rt_dst(rt).dev;
ip_rt_put(rt);
if (__in_dev_get_rtnl(dev) == NULL)
return -EADDRNOTAVAIL;
mutable->mlink = dev->ifindex;
ip_mc_inc_group(__in_dev_get_rtnl(dev), mutable->key.daddr);
}
out:
old_vport = port_table_lookup(&mutable->key, &old_mutable);
if (old_vport && old_vport != cur_vport)
return -EEXIST;
return 0;
}
struct vport *ovs_tnl_create(const struct vport_parms *parms,
const struct vport_ops *vport_ops,
const struct tnl_ops *tnl_ops)
{
struct vport *vport;
struct tnl_vport *tnl_vport;
struct tnl_mutable_config *mutable;
int err;
vport = ovs_vport_alloc(sizeof(struct tnl_vport), vport_ops, parms);
@@ -837,123 +337,24 @@ struct vport *ovs_tnl_create(const struct vport_parms *parms,
strcpy(tnl_vport->name, parms->name);
tnl_vport->tnl_ops = tnl_ops;
mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
if (!mutable) {
err = -ENOMEM;
goto error_free_vport;
}
err = tnl_set_config(ovs_dp_get_net(parms->dp), parms->options, tnl_ops,
NULL, mutable);
if (err)
goto error_free_mutable;
rcu_assign_pointer(tnl_vport->mutable, mutable);
port_table_add_port(vport);
return vport;
error_free_mutable:
free_mutable_rtnl(mutable);
kfree(mutable);
error_free_vport:
ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
int ovs_tnl_set_options(struct vport *vport, struct nlattr *options)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *old_mutable;
struct tnl_mutable_config *mutable;
int err;
old_mutable = rtnl_dereference(tnl_vport->mutable);
if (!old_mutable->key.daddr)
return -EINVAL;
mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
if (!mutable) {
err = -ENOMEM;
goto error;
}
/* Parse the others configured by userspace. */
err = tnl_set_config(ovs_dp_get_net(vport->dp), options, tnl_vport->tnl_ops,
vport, mutable);
if (err)
goto error_free;
if (port_hash(&mutable->key) != port_hash(&old_mutable->key))
port_table_move_port(vport, mutable);
else
assign_config_rcu(vport, mutable);
return 0;
error_free:
free_mutable_rtnl(mutable);
kfree(mutable);
error:
return err;
}
int ovs_tnl_get_options(const struct vport *vport, struct sk_buff *skb)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference_rtnl(tnl_vport->mutable);
if (mutable->dst_port && nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
ntohs(mutable->dst_port)))
goto nla_put_failure;
/* Skip the rest for null_ports */
if (!mutable->key.daddr)
return 0;
if (nla_put_be32(skb, OVS_TUNNEL_ATTR_DST_IPV4, mutable->key.daddr))
goto nla_put_failure;
if (nla_put_u32(skb, OVS_TUNNEL_ATTR_FLAGS,
mutable->flags & TNL_F_PUBLIC))
goto nla_put_failure;
if (!(mutable->flags & TNL_F_IN_KEY_MATCH) &&
nla_put_be64(skb, OVS_TUNNEL_ATTR_IN_KEY, mutable->key.in_key))
goto nla_put_failure;
if (!(mutable->flags & TNL_F_OUT_KEY_ACTION) &&
nla_put_be64(skb, OVS_TUNNEL_ATTR_OUT_KEY, mutable->out_key))
goto nla_put_failure;
if (mutable->key.saddr &&
nla_put_be32(skb, OVS_TUNNEL_ATTR_SRC_IPV4, mutable->key.saddr))
goto nla_put_failure;
if (mutable->tos && nla_put_u8(skb, OVS_TUNNEL_ATTR_TOS, mutable->tos))
goto nla_put_failure;
if (mutable->ttl && nla_put_u8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl))
goto nla_put_failure;
return 0;
nla_put_failure:
return -EMSGSIZE;
}
static void free_port_rcu(struct rcu_head *rcu)
{
struct tnl_vport *tnl_vport = container_of(rcu,
struct tnl_vport, rcu);
kfree((struct tnl_mutable __force *)tnl_vport->mutable);
ovs_vport_free(tnl_vport_to_vport(tnl_vport));
ovs_vport_free(vport_from_priv(tnl_vport));
}
void ovs_tnl_destroy(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *mutable;
mutable = rtnl_dereference(tnl_vport->mutable);
port_table_remove_port(vport);
free_mutable_rtnl(mutable);
call_rcu(&tnl_vport->rcu, free_port_rcu);
}
@@ -962,32 +363,3 @@ const char *ovs_tnl_get_name(const struct vport *vport)
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return tnl_vport->name;
}
void ovs_tnl_free_linked_skbs(struct sk_buff *skb)
{
while (skb) {
struct sk_buff *next = skb->next;
kfree_skb(skb);
skb = next;
}
}
int ovs_tnl_init(void)
{
int i;
port_table = kmalloc(PORT_TABLE_SIZE * sizeof(struct hlist_head *),
GFP_KERNEL);
if (!port_table)
return -ENOMEM;
for (i = 0; i < PORT_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&port_table[i]);
return 0;
}
void ovs_tnl_exit(void)
{
kfree(port_table);
}

View File

@@ -24,153 +24,42 @@
#include <net/netns/generic.h>
#include "flow.h"
#include "openvswitch/tunnel.h"
#include "vport.h"
/*
* The absolute minimum fragment size. Note that there are many other
* definitions of the minimum MTU.
*/
#define IP_MIN_MTU 68
/*
* One of these goes in struct tnl_ops and in tnl_find_port().
* These values are in the same namespace as other TNL_T_* values, so
* only the least significant 10 bits are available to define protocol
* identifiers.
*/
#define TNL_T_PROTO_GRE 0
#define TNL_T_PROTO_GRE64 1
#define TNL_T_PROTO_VXLAN 3
#define TNL_T_PROTO_LISP 4
/* These flags are only needed when calling tnl_find_port(). */
#define TNL_T_KEY_EXACT (1 << 10)
#define TNL_T_KEY_MATCH (1 << 11)
/* Private flags not exposed to userspace in this form. */
#define TNL_F_IN_KEY_MATCH (1 << 16) /* Store the key in tun_id to
* match in flow table. */
#define TNL_F_OUT_KEY_ACTION (1 << 17) /* Get the key from a SET_TUNNEL
* action. */
/* All public tunnel flags. */
#define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \
TNL_F_DF_DEFAULT)
/**
* struct port_lookup_key - Tunnel port key, used as hash table key.
* @in_key: Key to match on input, 0 for wildcard.
* @net: Network namespace of the port.
* @saddr: IPv4 source address to match, 0 to accept any source address.
* @daddr: IPv4 destination of tunnel.
* @tunnel_type: Set of TNL_T_* flags that define lookup.
*/
struct port_lookup_key {
__be64 in_key;
#ifdef CONFIG_NET_NS
struct net *net;
#endif
__be32 saddr;
__be32 daddr;
u32 tunnel_type;
};
#define PORT_KEY_LEN (offsetof(struct port_lookup_key, tunnel_type) + \
FIELD_SIZEOF(struct port_lookup_key, tunnel_type))
static inline struct net *port_key_get_net(const struct port_lookup_key *key)
{
return read_pnet(&key->net);
}
static inline void port_key_set_net(struct port_lookup_key *key, struct net *net)
{
write_pnet(&key->net, net);
}
/**
* struct tnl_mutable_config - modifiable configuration for a tunnel.
* @key: Used as key for tunnel port. Configured via OVS_TUNNEL_ATTR_*
* attributes.
* @rcu: RCU callback head for deferred destruction.
* @tunnel_hlen: Tunnel header length.
* @out_key: Key to use on output, 0 if this tunnel has no fixed output key.
* @flags: TNL_F_* flags.
* @tos: IPv4 TOS value to use for tunnel, 0 if no fixed TOS.
* @ttl: IPv4 TTL value to use for tunnel, 0 if no fixed TTL.
*/
struct tnl_mutable_config {
struct port_lookup_key key;
struct rcu_head rcu;
/* Configured via OVS_TUNNEL_ATTR_* attributes. */
__be64 out_key;
u32 flags;
u8 tos;
u8 ttl;
__be16 dst_port;
/* Multicast configuration. */
int mlink;
};
struct tnl_ops {
u32 tunnel_type; /* Put the TNL_T_PROTO_* type in here. */
u8 ipproto; /* The IP protocol for the tunnel. */
/*
* Returns the length of the tunnel header that will be added in
* build_header() (i.e. excludes the IP header). Returns a negative
* error code if the configuration is invalid.
* build_header() (i.e. excludes the IP header).
*/
int (*hdr_len)(const struct tnl_mutable_config *,
const struct ovs_key_ipv4_tunnel *);
int (*hdr_len)(const struct ovs_key_ipv4_tunnel *);
/*
* Returns a linked list of SKBs with tunnel headers (multiple
* packets may be generated in the event of fragmentation). Space
* will have already been allocated at the start of the packet equal
* to sizeof(struct iphdr) + value returned by hdr_len(). The IP
* header will have already been constructed.
* Builds header for given SKB. Space will have already been
* allocated at the start of the packet equal
* to sizeof(struct iphdr) + value returned by hdr_len().
*/
struct sk_buff *(*build_header)(const struct vport *,
const struct tnl_mutable_config *,
struct dst_entry *, struct sk_buff *,
void (*build_header)(const struct vport *, struct sk_buff *,
int tunnel_hlen);
};
struct tnl_vport {
struct rcu_head rcu;
struct hlist_node hash_node;
__be16 dst_port;
char name[IFNAMSIZ];
const struct tnl_ops *tnl_ops;
struct tnl_mutable_config __rcu *mutable;
};
struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *,
const struct tnl_ops *);
void ovs_tnl_destroy(struct vport *);
int ovs_tnl_set_options(struct vport *, struct nlattr *);
int ovs_tnl_get_options(const struct vport *, struct sk_buff *);
const char *ovs_tnl_get_name(const struct vport *vport);
int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb);
u16 ovs_tnl_get_src_port(struct sk_buff *skb);
struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
__be64 key, int tunnel_type,
const struct tnl_mutable_config **mutable);
bool ovs_tnl_frag_needed(struct vport *vport,
const struct tnl_mutable_config *mutable,
struct sk_buff *skb, unsigned int mtu);
void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
int ovs_tnl_init(void);
void ovs_tnl_exit(void);
static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
{
return vport_priv(vport);
@@ -191,25 +80,4 @@ static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
}
static inline void tnl_get_param(const struct tnl_mutable_config *mutable,
const struct ovs_key_ipv4_tunnel *tun_key,
u32 *flags, __be64 *out_key)
{
if (tun_key->ipv4_dst) {
*flags = 0;
if (tun_key->tun_flags & OVS_TNL_F_KEY)
*flags = TNL_F_OUT_KEY_ACTION;
if (tun_key->tun_flags & OVS_TNL_F_CSUM)
*flags |= TNL_F_CSUM;
*out_key = tun_key->tun_id;
} else {
*flags = mutable->flags;
if (mutable->flags & TNL_F_OUT_KEY_ACTION)
*out_key = tun_key->tun_id;
else
*out_key = mutable->out_key;
}
}
#endif /* tunnel.h */

View File

@@ -44,31 +44,29 @@ struct gre_base_hdr {
__be16 protocol;
};
static int gre_hdr_len(const struct tnl_mutable_config *mutable,
const struct ovs_key_ipv4_tunnel *tun_key)
static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
{
int len;
u32 flags;
__be64 out_key;
int len = GRE_HEADER_SECTION;
tnl_get_param(mutable, tun_key, &flags, &out_key);
len = GRE_HEADER_SECTION;
if (flags & TNL_F_CSUM)
if (tun_key->tun_flags & OVS_TNL_F_KEY)
len += GRE_HEADER_SECTION;
/* Set key for GRE64 tunnels, even when key if is zero. */
if (out_key ||
mutable->key.tunnel_type & TNL_T_PROTO_GRE64 ||
flags & TNL_F_OUT_KEY_ACTION) {
if (tun_key->tun_flags & OVS_TNL_F_CSUM)
len += GRE_HEADER_SECTION;
if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
len += GRE_HEADER_SECTION;
}
return len;
}
static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
{
/* Set key for GRE64 tunnels, even when key if is zero. */
int len = GRE_HEADER_SECTION + /* GRE Hdr */
GRE_HEADER_SECTION + /* GRE Key */
GRE_HEADER_SECTION; /* GRE SEQ */
if (tun_key->tun_flags & OVS_TNL_F_CSUM)
len += GRE_HEADER_SECTION;
return len;
}
/* Returns the least-significant 32 bits of a __be64. */
static __be32 be64_get_low32(__be64 x)
@@ -89,39 +87,33 @@ static __be32 be64_get_high32(__be64 x)
#endif
}
static struct sk_buff *gre_build_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
struct dst_entry *dst,
struct sk_buff *skb,
int tunnel_hlen)
static void __gre_build_header(struct sk_buff *skb,
int tunnel_hlen,
bool is_gre64)
{
u32 flags;
__be64 out_key;
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
__be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
- GRE_HEADER_SECTION);
struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb);
tnl_get_param(mutable, tun_key, &flags, &out_key);
struct dst_entry *dst = skb_dst(skb);
greh->protocol = htons(ETH_P_TEB);
greh->flags = 0;
/* Work backwards over the options so the checksum is last. */
if (out_key || flags & TNL_F_OUT_KEY_ACTION ||
mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
greh->flags |= GRE_KEY;
if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
if (is_gre64) {
/* Set higher 32 bits to seq. */
*options = be64_get_high32(out_key);
*options = be64_get_high32(tun_key->tun_id);
options--;
greh->flags |= GRE_SEQ;
}
*options = be64_get_low32(out_key);
*options = be64_get_low32(tun_key->tun_id);
options--;
}
if (flags & TNL_F_CSUM) {
if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
greh->flags |= GRE_CSUM;
*options = 0;
*(__sum16 *)options = csum_fold(skb_checksum(skb,
@@ -137,8 +129,20 @@ static struct sk_buff *gre_build_header(const struct vport *vport,
*/
skb->local_df = 1;
__ip_select_ident(ip_hdr(skb), dst, 0);
}
return skb;
static void gre_build_header(const struct vport *vport,
struct sk_buff *skb,
int tunnel_hlen)
{
__gre_build_header(skb, tunnel_hlen, false);
}
static void gre64_build_header(const struct vport *vport,
struct sk_buff *skb,
int tunnel_hlen)
{
__gre_build_header(skb, tunnel_hlen, true);
}
static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
@@ -151,7 +155,7 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
}
static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
u32 *tunnel_type)
bool *is_gre64)
{
/* IP and ICMP protocol handlers check that the IHL is valid. */
struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
@@ -183,16 +187,16 @@ static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
if (greh->flags & GRE_SEQ) {
seq = *options;
*tunnel_type = TNL_T_PROTO_GRE64;
*is_gre64 = true;
} else {
seq = 0;
*tunnel_type = TNL_T_PROTO_GRE;
*is_gre64 = false;
}
*tun_id = key_to_tunnel_id(gre_key, seq);
} else {
*tun_id = 0;
/* Ignore GRE seq if there is no key present. */
*tunnel_type = TNL_T_PROTO_GRE;
*is_gre64 = false;
}
if (greh->flags & GRE_SEQ)
@@ -227,18 +231,12 @@ static bool check_checksum(struct sk_buff *skb)
return (csum == 0);
}
static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable,
__be16 gre_flags, __be64 *key)
static u32 gre_flags_to_tunnel_flags(__be16 gre_flags, bool is_gre64)
{
u32 tunnel_flags = 0;
if (gre_flags & GRE_KEY) {
if (mutable->flags & TNL_F_IN_KEY_MATCH ||
!mutable->key.daddr)
if (gre_flags & GRE_KEY || is_gre64)
tunnel_flags = OVS_TNL_F_KEY;
else
*key = 0;
}
if (gre_flags & GRE_CSUM)
tunnel_flags |= OVS_TNL_F_CSUM;
@@ -249,35 +247,38 @@ static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable,
/* Called with rcu_read_lock and BH disabled. */
static int gre_rcv(struct sk_buff *skb)
{
struct ovs_net *ovs_net;
struct vport *vport;
const struct tnl_mutable_config *mutable;
int hdr_len;
struct iphdr *iph;
struct ovs_key_ipv4_tunnel tun_key;
__be16 gre_flags;
u32 tnl_flags;
__be64 key;
u32 tunnel_type;
bool is_gre64;
if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN)))
goto error;
if (unlikely(!check_checksum(skb)))
goto error;
hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &tunnel_type);
hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &is_gre64);
if (unlikely(hdr_len < 0))
goto error;
ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
if (is_gre64)
vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
else
vport = rcu_dereference(ovs_net->vport_net.gre_vport);
if (unlikely(!vport))
goto error;
if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
goto error;
iph = ip_hdr(skb);
vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, key,
tunnel_type, &mutable);
if (unlikely(!vport))
goto error;
tnl_flags = gre_flags_to_tunnel_flags(mutable, gre_flags, &key);
tnl_flags = gre_flags_to_tunnel_flags(gre_flags, is_gre64);
tnl_tun_key_init(&tun_key, iph, key, tnl_flags);
OVS_CB(skb)->tun_key = &tun_key;
@@ -292,30 +293,6 @@ error:
return 0;
}
static const struct tnl_ops gre_tnl_ops = {
.tunnel_type = TNL_T_PROTO_GRE,
.ipproto = IPPROTO_GRE,
.hdr_len = gre_hdr_len,
.build_header = gre_build_header,
};
static struct vport *gre_create(const struct vport_parms *parms)
{
return ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops);
}
static const struct tnl_ops gre64_tnl_ops = {
.tunnel_type = TNL_T_PROTO_GRE64,
.ipproto = IPPROTO_GRE,
.hdr_len = gre_hdr_len,
.build_header = gre_build_header,
};
static struct vport *gre_create64(const struct vport_parms *parms)
{
return ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops);
}
static const struct net_protocol gre_protocol_handlers = {
.handler = gre_rcv,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
@@ -350,28 +327,93 @@ static void gre_exit(void)
inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
}
/* GRE vport. */
static const struct tnl_ops gre_tnl_ops = {
.ipproto = IPPROTO_GRE,
.hdr_len = gre_hdr_len,
.build_header = gre_build_header,
};
static struct vport *gre_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct ovs_net *ovs_net;
struct vport *vport;
ovs_net = net_generic(net, ovs_net_id);
if (rtnl_dereference(ovs_net->vport_net.gre_vport))
return ERR_PTR(-EEXIST);
vport = ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops);
rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
return vport;
}
static void gre_tnl_destroy(struct vport *vport)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct ovs_net *ovs_net;
ovs_net = net_generic(net, ovs_net_id);
rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
ovs_tnl_destroy(vport);
}
const struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.flags = VPORT_F_TUN_ID,
.init = gre_init,
.exit = gre_exit,
.create = gre_create,
.destroy = ovs_tnl_destroy,
.destroy = gre_tnl_destroy,
.get_name = ovs_tnl_get_name,
.get_options = ovs_tnl_get_options,
.set_options = ovs_tnl_set_options,
.send = ovs_tnl_send,
};
/* GRE64 vport. */
static const struct tnl_ops gre64_tnl_ops = {
.ipproto = IPPROTO_GRE,
.hdr_len = gre64_hdr_len,
.build_header = gre64_build_header,
};
static struct vport *gre64_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct ovs_net *ovs_net;
struct vport *vport;
ovs_net = net_generic(net, ovs_net_id);
if (rtnl_dereference(ovs_net->vport_net.gre64_vport))
return ERR_PTR(-EEXIST);
vport = ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops);
rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport);
return vport;
}
static void gre64_tnl_destroy(struct vport *vport)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct ovs_net *ovs_net;
ovs_net = net_generic(net, ovs_net_id);
rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL);
ovs_tnl_destroy(vport);
}
const struct vport_ops ovs_gre64_vport_ops = {
.type = OVS_VPORT_TYPE_GRE64,
.flags = VPORT_F_TUN_ID,
.init = gre_init,
.exit = gre_exit,
.create = gre_create64,
.destroy = ovs_tnl_destroy,
.create = gre64_create,
.destroy = gre64_tnl_destroy,
.get_name = ovs_tnl_get_name,
.get_options = ovs_tnl_get_options,
.set_options = ovs_tnl_set_options,
.send = ovs_tnl_send,
};

View File

@@ -24,8 +24,8 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
#include <net/icmp.h>
@@ -94,8 +94,7 @@ struct lisphdr {
#define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
static inline int lisp_hdr_len(const struct tnl_mutable_config *mutable,
const struct ovs_key_ipv4_tunnel *tun_key)
static inline int lisp_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
{
return LISP_HLEN;
}
@@ -103,25 +102,26 @@ static inline int lisp_hdr_len(const struct tnl_mutable_config *mutable,
/**
* struct lisp_port - Keeps track of open UDP ports
* @list: list element.
* @port: The UDP port number in network byte order.
* @vport: vport for the tunnel.
* @socket: The socket created for this port number.
* @count: How many ports are using this socket/port.
*/
struct lisp_port {
struct list_head list;
__be16 port;
struct vport *vport;
struct socket *lisp_rcv_socket;
int count;
struct rcu_head rcu;
};
static LIST_HEAD(lisp_ports);
static struct lisp_port *lisp_port_exists(struct net *net, __be16 port)
static struct lisp_port *lisp_find_port(struct net *net, __be16 port)
{
struct lisp_port *lisp_port;
list_for_each_entry(lisp_port, &lisp_ports, list) {
if (lisp_port->port == port &&
list_for_each_entry_rcu(lisp_port, &lisp_ports, list) {
struct tnl_vport *tnl_vport = tnl_vport_priv(lisp_port->vport);
if (tnl_vport->dst_port == port &&
net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net))
return lisp_port;
}
@@ -180,21 +180,16 @@ static __be64 instance_id_to_tunnel_id(__u8 *iid)
#endif
}
static struct sk_buff *lisp_build_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
struct dst_entry *dst,
static void lisp_build_header(const struct vport *vport,
struct sk_buff *skb,
int tunnel_hlen)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct udphdr *udph = udp_hdr(skb);
struct lisphdr *lisph = (struct lisphdr *)(udph + 1);
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
__be64 out_key;
u32 flags;
tnl_get_param(mutable, tun_key, &flags, &out_key);
udph->dest = mutable->dst_port;
udph->dest = tnl_vport->dst_port;
udph->source = htons(ovs_tnl_get_src_port(skb));
udph->check = 0;
udph->len = htons(skb->len - skb_transport_offset(skb));
@@ -210,7 +205,7 @@ static struct sk_buff *lisp_build_header(const struct vport *vport,
lisph->u1.nonce[1] = 0;
lisph->u1.nonce[2] = 0;
tunnel_id_to_instance_id(out_key, &lisph->u2.word2.instance_id[0]);
tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]);
lisph->u2.word2.locator_status_bits = 1;
/*
@@ -220,24 +215,24 @@ static struct sk_buff *lisp_build_header(const struct vport *vport,
* packet originally had DF set.
*/
skb->local_df = 1;
__ip_select_ident(ip_hdr(skb), dst, 0);
return skb;
__ip_select_ident(ip_hdr(skb), skb_dst(skb), 0);
}
/* Called with rcu_read_lock and BH disabled. */
static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
{
struct vport *vport;
struct lisp_port *lisp_port;
struct lisphdr *lisph;
const struct tnl_mutable_config *mutable;
struct iphdr *iph, *inner_iph;
struct ovs_key_ipv4_tunnel tun_key;
__be64 key;
u32 tunnel_flags = 0;
struct ethhdr *ethh;
__be16 protocol;
lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
if (unlikely(!lisp_port))
goto error;
if (unlikely(!pskb_may_pull(skb, LISP_HLEN)))
goto error;
@@ -250,19 +245,9 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
else
key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]);
iph = ip_hdr(skb);
vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
key, TNL_T_PROTO_LISP, &mutable);
if (unlikely(!vport))
goto error;
if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
tunnel_flags = OVS_TNL_F_KEY;
else
key = 0;
/* Save outer tunnel values */
tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
iph = ip_hdr(skb);
tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
OVS_CB(skb)->tun_key = &tun_key;
/* Drop non-IP inner packets */
@@ -285,7 +270,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
ethh->h_source[0] = 0x02;
ethh->h_proto = protocol;
ovs_tnl_rcv(vport, skb);
ovs_tnl_rcv(lisp_port->vport, skb);
goto out;
error:
@@ -300,6 +285,7 @@ static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net)
{
int err;
struct sockaddr_in sin;
struct tnl_vport *tnl_vport = tnl_vport_priv(lisp_port->vport);
err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
&lisp_port->lisp_rcv_socket);
@@ -311,7 +297,7 @@ static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net)
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = lisp_port->port;
sin.sin_port = tnl_vport->dst_port;
err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin,
sizeof(struct sockaddr_in));
@@ -332,27 +318,33 @@ error:
return err;
}
static void lisp_tunnel_release(struct lisp_port *lisp_port)
{
lisp_port->count--;
if (lisp_port->count == 0) {
/* Release old socket */
sk_release_kernel(lisp_port->lisp_rcv_socket->sk);
list_del(&lisp_port->list);
static void free_port_rcu(struct rcu_head *rcu)
{
struct lisp_port *lisp_port = container_of(rcu,
struct lisp_port, rcu);
kfree(lisp_port);
}
static void lisp_tunnel_release(struct lisp_port *lisp_port)
{
if (!lisp_port)
return;
list_del_rcu(&lisp_port->list);
/* Release socket */
sk_release_kernel(lisp_port->lisp_rcv_socket->sk);
call_rcu(&lisp_port->rcu, free_port_rcu);
}
static int lisp_tunnel_setup(struct net *net, struct nlattr *options,
struct lisp_port **lport)
static int lisp_tunnel_setup(struct net *net, struct vport *vport,
struct nlattr *options)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct lisp_port *lisp_port;
struct nlattr *a;
int err;
u16 dst_port;
struct lisp_port *lisp_port = NULL;
*lport = NULL;
if (!options) {
err = -EINVAL;
@@ -369,11 +361,9 @@ static int lisp_tunnel_setup(struct net *net, struct nlattr *options,
}
/* Verify if we already have a socket created for this port */
lisp_port = lisp_port_exists(net, htons(dst_port));
lisp_port = lisp_find_port(net, htons(dst_port));
if (lisp_port) {
lisp_port->count++;
err = 0;
*lport = lisp_port;
err = -EEXIST;
goto out;
}
@@ -384,55 +374,33 @@ static int lisp_tunnel_setup(struct net *net, struct nlattr *options,
goto out;
}
lisp_port->port = htons(dst_port);
lisp_port->count = 1;
list_add_tail(&lisp_port->list, &lisp_ports);
tnl_vport->dst_port = htons(dst_port);
lisp_port->vport = vport;
list_add_tail_rcu(&lisp_port->list, &lisp_ports);
err = lisp_socket_init(lisp_port, net);
if (err)
goto error;
*lport = lisp_port;
goto out;
return 0;
error:
list_del(&lisp_port->list);
list_del_rcu(&lisp_port->list);
kfree(lisp_port);
out:
return err;
}
static int lisp_tnl_set_options(struct vport *vport, struct nlattr *options)
static int lisp_get_options(const struct vport *vport, struct sk_buff *skb)
{
int err;
struct net *net = ovs_dp_get_net(vport->dp);
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *config;
struct lisp_port *old_port = NULL;
struct lisp_port *lisp_port = NULL;
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
config = rtnl_dereference(tnl_vport->mutable);
old_port = lisp_port_exists(net, config->dst_port);
err = lisp_tunnel_setup(net, options, &lisp_port);
if (err)
goto out;
err = ovs_tnl_set_options(vport, options);
if (err)
lisp_tunnel_release(lisp_port);
else {
/* Release old socket */
lisp_tunnel_release(old_port);
}
out:
return err;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(tnl_vport->dst_port)))
return -EMSGSIZE;
return 0;
}
static const struct tnl_ops ovs_lisp_tnl_ops = {
.tunnel_type = TNL_T_PROTO_LISP,
.ipproto = IPPROTO_UDP,
.hdr_len = lisp_hdr_len,
.build_header = lisp_build_header,
@@ -442,33 +410,29 @@ static void lisp_tnl_destroy(struct vport *vport)
{
struct lisp_port *lisp_port;
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *config;
config = rtnl_dereference(tnl_vport->mutable);
lisp_port = lisp_port_exists(ovs_dp_get_net(vport->dp),
config->dst_port);
lisp_port = lisp_find_port(ovs_dp_get_net(vport->dp),
tnl_vport->dst_port);
lisp_tunnel_release(lisp_port);
ovs_tnl_destroy(vport);
}
static struct vport *lisp_tnl_create(const struct vport_parms *parms)
{
int err;
struct vport *vport;
struct lisp_port *lisp_port = NULL;
err = lisp_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options,
&lisp_port);
if (err)
return ERR_PTR(err);
int err;
vport = ovs_tnl_create(parms, &ovs_lisp_vport_ops, &ovs_lisp_tnl_ops);
if (IS_ERR(vport))
lisp_tunnel_release(lisp_port);
return vport;
err = lisp_tunnel_setup(ovs_dp_get_net(parms->dp), vport,
parms->options);
if (err) {
ovs_tnl_destroy(vport);
return ERR_PTR(err);
}
return vport;
}
@@ -479,8 +443,7 @@ const struct vport_ops ovs_lisp_vport_ops = {
.create = lisp_tnl_create,
.destroy = lisp_tnl_destroy,
.get_name = ovs_tnl_get_name,
.get_options = ovs_tnl_get_options,
.set_options = lisp_tnl_set_options,
.get_options = lisp_get_options,
.send = lisp_tnl_send,
};
#else

View File

@@ -24,8 +24,8 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
#include <net/icmp.h>
@@ -50,8 +50,7 @@ struct vxlanhdr {
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
const struct ovs_key_ipv4_tunnel *tun_key)
static inline int vxlan_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
{
return VXLAN_HLEN;
}
@@ -59,25 +58,26 @@ static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
/**
* struct vxlan_port - Keeps track of open UDP ports
* @list: list element.
* @port: The UDP port number in network byte order.
* @vport: vport for the tunnel.
* @socket: The socket created for this port number.
* @count: How many ports are using this socket/port.
*/
struct vxlan_port {
struct list_head list;
__be16 port;
struct vport *vport;
struct socket *vxlan_rcv_socket;
int count;
struct rcu_head rcu;
};
static LIST_HEAD(vxlan_ports);
static struct vxlan_port *vxlan_port_exists(struct net *net, __be16 port)
static struct vxlan_port *vxlan_find_port(struct net *net, __be16 port)
{
struct vxlan_port *vxlan_port;
list_for_each_entry(vxlan_port, &vxlan_ports, list) {
if (vxlan_port->port == port &&
list_for_each_entry_rcu(vxlan_port, &vxlan_ports, list) {
struct tnl_vport *tnl_vport = tnl_vport_priv(vxlan_port->vport);
if (tnl_vport->dst_port == port &&
net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net))
return vxlan_port;
}
@@ -90,27 +90,22 @@ static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
return (struct vxlanhdr *)(udp_hdr(skb) + 1);
}
static struct sk_buff *vxlan_build_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
struct dst_entry *dst,
static void vxlan_build_header(const struct vport *vport,
struct sk_buff *skb,
int tunnel_hlen)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct udphdr *udph = udp_hdr(skb);
struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
__be64 out_key;
u32 flags;
tnl_get_param(mutable, tun_key, &flags, &out_key);
udph->dest = mutable->dst_port;
udph->dest = tnl_vport->dst_port;
udph->source = htons(ovs_tnl_get_src_port(skb));
udph->check = 0;
udph->len = htons(skb->len - skb_transport_offset(skb));
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
vxh->vx_vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
/*
* Allow our local IP stack to fragment the outer packet even if the
@@ -119,21 +114,21 @@ static struct sk_buff *vxlan_build_header(const struct vport *vport,
* packet originally had DF set.
*/
skb->local_df = 1;
__ip_select_ident(ip_hdr(skb), dst, 0);
return skb;
__ip_select_ident(ip_hdr(skb), skb_dst(skb), 0);
}
/* Called with rcu_read_lock and BH disabled. */
static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
{
struct vport *vport;
struct vxlan_port *vxlan_vport;
struct vxlanhdr *vxh;
const struct tnl_mutable_config *mutable;
struct iphdr *iph;
struct ovs_key_ipv4_tunnel tun_key;
__be64 key;
u32 tunnel_flags = 0;
vxlan_vport = vxlan_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
if (unlikely(!vxlan_vport))
goto error;
if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
goto error;
@@ -148,22 +143,12 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
iph = ip_hdr(skb);
vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
key, TNL_T_PROTO_VXLAN, &mutable);
if (unlikely(!vport))
goto error;
if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
tunnel_flags = OVS_TNL_F_KEY;
else
key = 0;
/* Save outer tunnel values */
tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
iph = ip_hdr(skb);
tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
OVS_CB(skb)->tun_key = &tun_key;
ovs_tnl_rcv(vport, skb);
ovs_tnl_rcv(vxlan_vport->vport, skb);
goto out;
error:
@@ -178,6 +163,7 @@ static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
{
int err;
struct sockaddr_in sin;
struct tnl_vport *tnl_vport = tnl_vport_priv(vxlan_port->vport);
err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
&vxlan_port->vxlan_rcv_socket);
@@ -189,7 +175,7 @@ static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = vxlan_port->port;
sin.sin_port = tnl_vport->dst_port;
err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
sizeof(struct sockaddr_in));
@@ -210,26 +196,33 @@ error:
return err;
}
static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
static void free_port_rcu(struct rcu_head *rcu)
{
vxlan_port->count--;
struct vxlan_port *vxlan_port = container_of(rcu,
struct vxlan_port, rcu);
if (vxlan_port->count == 0) {
/* Release old socket */
sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
list_del(&vxlan_port->list);
kfree(vxlan_port);
}
}
static int vxlan_tunnel_setup(struct net *net, struct nlattr *options,
struct vxlan_port **vxport)
static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
{
if (!vxlan_port)
return;
list_del_rcu(&vxlan_port->list);
/* Release socket */
sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
call_rcu(&vxlan_port->rcu, free_port_rcu);
}
static int vxlan_tunnel_setup(struct net *net, struct vport *vport,
struct nlattr *options)
{
struct vxlan_port *vxlan_port;
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct nlattr *a;
int err;
u16 dst_port;
struct vxlan_port *vxlan_port = NULL;
*vxport = NULL;
if (!options) {
err = -EINVAL;
@@ -246,11 +239,9 @@ static int vxlan_tunnel_setup(struct net *net, struct nlattr *options,
}
/* Verify if we already have a socket created for this port */
vxlan_port = vxlan_port_exists(net, htons(dst_port));
vxlan_port = vxlan_find_port(net, htons(dst_port));
if (vxlan_port) {
vxlan_port->count++;
err = 0;
*vxport = vxlan_port;
err = -EEXIST;
goto out;
}
@@ -261,55 +252,33 @@ static int vxlan_tunnel_setup(struct net *net, struct nlattr *options,
goto out;
}
vxlan_port->port = htons(dst_port);
vxlan_port->count = 1;
list_add_tail(&vxlan_port->list, &vxlan_ports);
tnl_vport->dst_port = htons(dst_port);
vxlan_port->vport = vport;
list_add_tail_rcu(&vxlan_port->list, &vxlan_ports);
err = vxlan_socket_init(vxlan_port, net);
if (err)
goto error;
*vxport = vxlan_port;
goto out;
return 0;
error:
list_del(&vxlan_port->list);
list_del_rcu(&vxlan_port->list);
kfree(vxlan_port);
out:
return err;
}
static int vxlan_set_options(struct vport *vport, struct nlattr *options)
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
int err;
struct net *net = ovs_dp_get_net(vport->dp);
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *config;
struct vxlan_port *old_port = NULL;
struct vxlan_port *vxlan_port = NULL;
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
config = rtnl_dereference(tnl_vport->mutable);
old_port = vxlan_port_exists(net, config->dst_port);
err = vxlan_tunnel_setup(net, options, &vxlan_port);
if (err)
goto out;
err = ovs_tnl_set_options(vport, options);
if (err)
vxlan_tunnel_release(vxlan_port);
else {
/* Release old socket */
vxlan_tunnel_release(old_port);
}
out:
return err;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(tnl_vport->dst_port)))
return -EMSGSIZE;
return 0;
}
static const struct tnl_ops ovs_vxlan_tnl_ops = {
.tunnel_type = TNL_T_PROTO_VXLAN,
.ipproto = IPPROTO_UDP,
.hdr_len = vxlan_hdr_len,
.build_header = vxlan_build_header,
@@ -319,15 +288,11 @@ static void vxlan_tnl_destroy(struct vport *vport)
{
struct vxlan_port *vxlan_port;
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *config;
config = rtnl_dereference(tnl_vport->mutable);
vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
config->dst_port);
vxlan_port = vxlan_find_port(ovs_dp_get_net(vport->dp),
tnl_vport->dst_port);
vxlan_tunnel_release(vxlan_port);
ovs_tnl_destroy(vport);
}
@@ -335,17 +300,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
int err;
struct vport *vport;
struct vxlan_port *vxlan_port = NULL;
err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options,
&vxlan_port);
if (err)
return ERR_PTR(err);
vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
if (IS_ERR(vport))
vxlan_tunnel_release(vxlan_port);
return vport;
err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), vport,
parms->options);
if (err) {
ovs_tnl_destroy(vport);
return ERR_PTR(err);
}
return vport;
}
@@ -356,8 +321,7 @@ const struct vport_ops ovs_vxlan_vport_ops = {
.create = vxlan_tnl_create,
.destroy = vxlan_tnl_destroy,
.get_name = ovs_tnl_get_name,
.get_options = ovs_tnl_get_options,
.set_options = vxlan_set_options,
.get_options = vxlan_get_options,
.send = ovs_tnl_send,
};
#else

View File

@@ -30,6 +30,10 @@ struct vport;
struct vport_parms;
/* The following definitions are for users of the vport subsytem: */
struct vport_net {
struct vport __rcu *gre_vport;
struct vport __rcu *gre64_vport;
};
int ovs_vport_init(void);
void ovs_vport_exit(void);

1
debian/copyright.in vendored
View File

@@ -77,7 +77,6 @@ License:
include/linux/openvswitch.h
include/openvswitch/datapath-compat.h
include/openvswitch/tunnel.h
On Debian systems, the complete text of the GNU General Public License
version 2 can be found in `/usr/share/common-licenses/GPL-2'.

View File

@@ -243,6 +243,16 @@ enum {
#define OVS_PATCH_ATTR_MAX (__OVS_PATCH_ATTR_MAX - 1)
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
*/
enum {
OVS_TUNNEL_ATTR_UNSPEC,
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
__OVS_TUNNEL_ATTR_MAX
};
#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1)
/* Flows. */
#define OVS_FLOW_FAMILY "ovs_flow"
@@ -286,7 +296,6 @@ enum ovs_key_attr {
#endif
OVS_KEY_ATTR_MPLS = 62, /* struct ovs_key_mpls */
OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
__OVS_KEY_ATTR_MAX
};

View File

@@ -1,5 +1,4 @@
noinst_HEADERS += \
include/openvswitch/datapath-compat.h \
include/openvswitch/tunnel.h \
include/openvswitch/types.h

View File

@@ -1,77 +0,0 @@
/*
* Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
*
* This file is offered under your choice of two licenses: Apache 2.0 or GNU
* GPL 2.0 or later. The permission statements for each of these licenses is
* given below. You may license your modifications to this file under either
* of these licenses or both. If you wish to license your modifications under
* only one of these licenses, delete the permission text for the other
* license.
*
* ----------------------------------------------------------------------
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ----------------------------------------------------------------------
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
* ----------------------------------------------------------------------
*/
#ifndef OPENVSWITCH_TUNNEL_H
#define OPENVSWITCH_TUNNEL_H 1
#include <linux/types.h>
#include <linux/openvswitch.h>
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
*
* OVS_TUNNEL_ATTR_DST_IPV4 is required for kernel tunnel ports, all other
* attributes are optional.
* For flow-based tunnels, only the OVS_TUNNEL_ATTR_DST_PORT is useful.
*/
enum {
OVS_TUNNEL_ATTR_UNSPEC,
OVS_TUNNEL_ATTR_FLAGS, /* 32-bit TNL_F_*. */
OVS_TUNNEL_ATTR_DST_IPV4, /* Remote IPv4 address. */
OVS_TUNNEL_ATTR_SRC_IPV4, /* Local IPv4 address. */
OVS_TUNNEL_ATTR_OUT_KEY, /* __be64 key to use on output. */
OVS_TUNNEL_ATTR_IN_KEY, /* __be64 key to match on input. */
OVS_TUNNEL_ATTR_TOS, /* 8-bit TOS value. */
OVS_TUNNEL_ATTR_TTL, /* 8-bit TTL value. */
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by VXLAN. */
__OVS_TUNNEL_ATTR_MAX
};
#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1)
#define TNL_F_CSUM (1 << 0) /* Checksum packets. */
#define TNL_F_TOS_INHERIT (1 << 1) /* Inherit ToS from inner packet. */
#define TNL_F_TTL_INHERIT (1 << 2) /* Inherit TTL from inner packet. */
/* Bit 3 was previously used for Don't Fragment inheritance. " */
#define TNL_F_DF_DEFAULT (1 << 4) /* Set DF bit if inherit off or
* not IP. */
/* Bit 5 was previously used for path MTU discovery. " */
/* Bit 6 is reserved since it was previously used for Tunnel header caching. */
/* Bit 7 was previously used for IPsec tunnel ports. */
#endif /* openvswitch/tunnel.h */

View File

@@ -47,7 +47,6 @@
#include "odp-util.h"
#include "ofpbuf.h"
#include "openvswitch/datapath-compat.h"
#include "openvswitch/tunnel.h"
#include "packets.h"
#include "poll-loop.h"
#include "random.h"

View File

@@ -1210,7 +1210,6 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
const struct ovs_key_udp *udp_key;
switch (type) {
case OVS_KEY_ATTR_TUN_ID:
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_TUNNEL:

View File

@@ -329,6 +329,10 @@ set_tunnel_config(struct netdev_dev *dev_, const struct smap *args)
struct in_addr in_addr;
if (lookup_ip(node->value, &in_addr)) {
VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
} else if (ip_is_multicast(in_addr.s_addr)) {
VLOG_WARN("%s: multicast remote_ip="IP_FMT" not allowed",
name, IP_ARGS(in_addr.s_addr));
return EINVAL;
} else {
tnl_cfg.ip_dst = in_addr.s_addr;
}
@@ -446,14 +450,6 @@ set_tunnel_config(struct netdev_dev *dev_, const struct smap *args)
name, type);
return EINVAL;
}
if (tnl_cfg.ip_src) {
if (ip_is_multicast(tnl_cfg.ip_dst)) {
VLOG_WARN("%s: remote_ip is multicast, ignoring local_ip", name);
tnl_cfg.ip_src = 0;
}
}
if (!tnl_cfg.ttl) {
tnl_cfg.ttl = DEFAULT_TTL;
}

View File

@@ -96,7 +96,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
case OVS_KEY_ATTR_ENCAP: return "encap";
case OVS_KEY_ATTR_PRIORITY: return "skb_priority";
case OVS_KEY_ATTR_SKB_MARK: return "skb_mark";
case OVS_KEY_ATTR_TUN_ID: return "tun_id";
case OVS_KEY_ATTR_TUNNEL: return "tunnel";
case OVS_KEY_ATTR_IN_PORT: return "in_port";
case OVS_KEY_ATTR_ETHERNET: return "eth";
@@ -665,7 +664,6 @@ odp_flow_key_attr_len(uint16_t type)
case OVS_KEY_ATTR_ENCAP: return -2;
case OVS_KEY_ATTR_PRIORITY: return 4;
case OVS_KEY_ATTR_SKB_MARK: return 4;
case OVS_KEY_ATTR_TUN_ID: return 8;
case OVS_KEY_ATTR_TUNNEL: return -2;
case OVS_KEY_ATTR_IN_PORT: return 4;
case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
@@ -870,10 +868,6 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
ds_put_format(ds, "(%#"PRIx32")", nl_attr_get_u32(a));
break;
case OVS_KEY_ATTR_TUN_ID:
ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a)));
break;
case OVS_KEY_ATTR_TUNNEL:
memset(&tun_key, 0, sizeof tun_key);
if (tun_key_from_attr(a, &tun_key) == ODP_FIT_ERROR) {
@@ -1120,18 +1114,6 @@ parse_odp_key_attr(const char *s, const struct simap *port_names,
}
}
{
char tun_id_s[32];
int n = -1;
if (sscanf(s, "tun_id(%31[x0123456789abcdefABCDEF])%n",
tun_id_s, &n) > 0 && n > 0) {
uint64_t tun_id = strtoull(tun_id_s, NULL, 0);
nl_msg_put_be64(key, OVS_KEY_ATTR_TUN_ID, htonll(tun_id));
return n;
}
}
{
char tun_id_s[32];
int tos, ttl;
@@ -1538,8 +1520,6 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow,
if (flow->tunnel.ip_dst) {
tun_key_to_attr(buf, &flow->tunnel);
} else if (flow->tunnel.tun_id != htonll(0)) {
nl_msg_put_be64(buf, OVS_KEY_ATTR_TUN_ID, flow->tunnel.tun_id);
}
if (flow->skb_mark) {
@@ -2072,11 +2052,6 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK;
}
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUN_ID)) {
flow->tunnel.tun_id = nl_attr_get_be64(attrs[OVS_KEY_ATTR_TUN_ID]);
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUN_ID;
}
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) {
enum odp_key_fitness res;
@@ -2209,9 +2184,6 @@ commit_odp_tunnel_action(const struct flow *flow, struct flow *base,
/* A valid IPV4_TUNNEL must have non-zero ip_dst. */
if (flow->tunnel.ip_dst) {
odp_put_tunnel_action(&base->tunnel, odp_actions);
} else {
commit_set_action(odp_actions, OVS_KEY_ATTR_TUN_ID,
&base->tunnel.tun_id, sizeof base->tunnel.tun_id);
}
}

View File

@@ -54,7 +54,6 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
* struct pad nl hdr total
* ------ --- ------ -----
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_TUN_ID 8 -- 4 12
* OVS_KEY_ATTR_TUNNEL 0 -- 4 4
* - OVS_TUNNEL_KEY_ATTR_ID 8 -- 4 12
* - OVS_TUNNEL_KEY_ATTR_IPV4_SRC 4 -- 4 8
@@ -74,7 +73,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* ----------------------------------------------------------
* total 220
* total 208
*
* We include some slack space in case the calculation isn't quite right or we
* add another field and forget to adjust this value.

View File

@@ -34,8 +34,6 @@
*
* Ability to generate actions on input for ECN
* Ability to generate metadata for packet-outs
* VXLAN.
* Multicast group management (possibly).
* Disallow netdevs with names like "gre64_system" to prevent collisions. */
VLOG_DEFINE_THIS_MODULE(tunnel);
@@ -322,16 +320,13 @@ static struct tnl_port *
tnl_find(struct tnl_match *match_)
{
struct tnl_match match = *match_;
bool is_multicast = ip_is_multicast(match.ip_src);
struct tnl_port *tnl_port;
/* remote_ip, local_ip, in_key */
if (!is_multicast) {
tnl_port = tnl_find_exact(&match);
if (tnl_port) {
return tnl_port;
}
}
/* remote_ip, in_key */
match.ip_src = 0;
@@ -342,47 +337,20 @@ tnl_find(struct tnl_match *match_)
match.ip_src = match_->ip_src;
/* remote_ip, local_ip */
if (!is_multicast) {
match.in_key = 0;
match.in_key_flow = true;
tnl_port = tnl_find_exact(&match);
if (tnl_port) {
return tnl_port;
}
match.in_key = match_->in_key;
match.in_key_flow = false;
}
/* remote_ip */
match.ip_src = 0;
match.in_key = 0;
match.in_key_flow = true;
tnl_port = tnl_find_exact(&match);
if (tnl_port) {
return tnl_port;
}
match.ip_src = match_->ip_src;
match.in_key = match_->in_key;
match.in_key_flow = false;
if (is_multicast) {
match.ip_src = 0;
match.ip_dst = match_->ip_src;
/* multicast remote_ip, in_key */
tnl_port = tnl_find_exact(&match);
if (tnl_port) {
return tnl_port;
}
/* multicast remote_ip */
match.in_key = 0;
match.in_key_flow = true;
tnl_port = tnl_find_exact(&match);
if (tnl_port) {
return tnl_port;
}
}
return NULL;
}

View File

@@ -90,7 +90,6 @@ userspace(pid=9765,slow_path(cfm))
userspace(pid=9765,slow_path(cfm,match))
userspace(pid=9123,userdata=0x815309)
userspace(pid=1234567,userdata(0102030405060708090a0b0c0d0e0f))
set(tun_id(0x7f10354))
set(in_port(2))
set(eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15))
set(eth_type(0x1234))