2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 01:51:26 +00:00

datapath: Add Stateless TCP Tunneling protocol.

The Stateless TCP Tunnel (STT) protocol encapsulates traffic in
IPv4/TCP packets.
STT uses TCP segmentation offload available in most of NIC. On
packet xmit STT driver appends STT header along with TCP header
to the packet. For GSO packet GSO parameters are set according
to tunnel configuration and packet is handed over to networking
stack. This allows use of segmentation offload available in NICs

The protocol is documented at
http://www.ietf.org/archive/id/draft-davie-stt-06.txt

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
This commit is contained in:
Pravin B Shelar 2015-04-09 20:12:32 -07:00
parent a51a50862a
commit 4237026e52
14 changed files with 1920 additions and 12 deletions

1
FAQ.md
View File

@ -218,6 +218,7 @@ A: Support for tunnels was added to the upstream Linux kernel module
| VXLAN | 3.12
| Geneve | 3.18
| LISP | <not upstream>
| STT | <not upstream>
If you are using a version of the kernel that is older than the one
listed above, it is still possible to use that tunnel protocol. However,

1
NEWS
View File

@ -80,6 +80,7 @@ Post-v2.3.0
- The kernel vports with dependencies are no longer part of the overall
openvswitch.ko but built and loaded automatically as individual kernel
modules (vport-*.ko).
- Support for STT tunneling.
v2.3.0 - 14 Aug 2014

View File

@ -9,6 +9,7 @@ both_modules = \
vport_geneve \
vport_gre \
vport_lisp \
vport_stt \
vport_vxlan
# When changing the name of 'build_modules', please also update the
# print-build-modules in Makefile.am.
@ -30,6 +31,7 @@ vport_geneve_sources = vport-geneve.c
vport_vxlan_sources = vport-vxlan.c
vport_gre_sources = vport-gre.c
vport_lisp_sources = vport-lisp.c
vport_stt_sources = vport-stt.c
openvswitch_headers = \
compat.h \

View File

@ -35,6 +35,7 @@
/random32.c
/reciprocal_div.c
/skbuff-openvswitch.c
/stt.c
/table.c
/time.c
/tmp
@ -50,6 +51,7 @@
/vport-lisp.c
/vport-netdev.c
/vport-patch.c
/vport-stt.c
/vport-vxlan.c
/vport.c
/vxlan.c

View File

@ -12,6 +12,7 @@ openvswitch_sources += \
linux/compat/net_namespace.c \
linux/compat/reciprocal_div.c \
linux/compat/skbuff-openvswitch.c \
linux/compat/stt.c \
linux/compat/udp.c \
linux/compat/udp_tunnel.c \
linux/compat/vxlan.c \
@ -75,6 +76,7 @@ openvswitch_headers += \
linux/compat/include/net/udp.h \
linux/compat/include/net/udp_tunnel.h \
linux/compat/include/net/sock.h \
linux/compat/include/net/stt.h \
linux/compat/include/net/vxlan.h \
linux/compat/include/net/sctp/checksum.h
EXTRA_DIST += linux/compat/build-aux/export-check-whitelist

View File

@ -26,6 +26,15 @@ struct ovs_gso_cb {
};
#define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb)
static inline void skb_clear_ovs_gso_cb(struct sk_buff *skb)
{
OVS_GSO_CB(skb)->fix_segment = NULL;
}
#else
static inline void skb_clear_ovs_gso_cb(struct sk_buff *skb)
{
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)

View File

@ -229,6 +229,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */
OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */
OVS_VPORT_TYPE_STT = 106, /* STT tunnel */
__OVS_VPORT_TYPE_MAX
};

View File

@ -0,0 +1,71 @@
#ifndef __NET_STT_H
#define __NET_STT_H 1
#include <linux/kconfig.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) && IS_ENABLED(CONFIG_NETFILTER)
#include <net/ip_tunnels.h>
#define OVS_STT
struct stthdr {
__u8 version;
__u8 flags;
__u8 l4_offset;
__u8 reserved;
__be16 mss;
__be16 vlan_tci;
__be64 key;
};
/* Padding after the end of the tunnel headers to provide alignment
* for inner packet IP header after 14 byte Ethernet header.
*/
#define STT_ETH_PAD 2
#define STT_BASE_HLEN (sizeof(struct stthdr) + STT_ETH_PAD)
#define STT_HEADER_LEN (sizeof(struct tcphdr) + STT_BASE_HLEN)
static inline struct stthdr *stt_hdr(const struct sk_buff *skb)
{
return (struct stthdr *)(skb_transport_header(skb) +
sizeof(struct tcphdr));
}
struct stt_sock;
typedef void (stt_rcv_t)(struct stt_sock *stt_sock, struct sk_buff *skb);
/* @list: Per-net list of STT ports.
* @rcv: The callback is called on STT packet recv, STT reassembly can generate
* multiple packets, in this case first packet has tunnel outer header, rest
* of the packets are inner packet segments with no stt header.
* @rcv_data: user data.
* @sock: Fake TCP socket for the STT port.
*/
struct stt_sock {
struct list_head list;
stt_rcv_t *rcv;
void *rcv_data;
struct socket *sock;
struct rcu_head rcu;
};
#define stt_sock_add rpl_stt_sock_add
struct stt_sock *rpl_stt_sock_add(struct net *net, __be16 port,
stt_rcv_t *rcv, void *data);
#define stt_sock_release rpl_stt_sock_release
void rpl_stt_sock_release(struct stt_sock *stt_sock);
#define stt_xmit_skb rpl_stt_xmit_skb
int rpl_stt_xmit_skb(struct sk_buff *skb, struct rtable *rt,
__be32 src, __be32 dst, __u8 tos,
__u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
__be64 tun_id);
#define stt_init_module ovs_stt_init_module
int ovs_stt_init_module(void);
#define stt_cleanup_module ovs_stt_cleanup_module
void ovs_stt_cleanup_module(void);
#endif
#endif /*ifdef__NET_STT_H */

1550
datapath/linux/compat/stt.c Normal file

File diff suppressed because it is too large Load Diff

231
datapath/vport-stt.c Normal file
View File

@ -0,0 +1,231 @@
/*
* Copyright (c) 2015 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/stt.h>
#include <net/udp.h>
#include "datapath.h"
#include "vport.h"
#ifdef OVS_STT
static struct vport_ops ovs_stt_vport_ops;
/**
* struct stt_port
* @stt_sock: The socket created for this port number.
* @name: vport name.
*/
struct stt_port {
struct stt_sock *stt_sock;
char name[IFNAMSIZ];
};
static inline struct stt_port *stt_vport(const struct vport *vport)
{
return vport_priv(vport);
}
static void stt_rcv(struct stt_sock *stt_sock, struct sk_buff *skb)
{
struct vport *vport = stt_sock->rcv_data;
struct stthdr *stth = stt_hdr(skb);
struct ovs_tunnel_info tun_info;
struct sk_buff *next;
ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
get_unaligned(&stth->key),
TUNNEL_KEY | TUNNEL_CSUM,
NULL, 0);
do {
next = skb->next;
skb->next = NULL;
ovs_vport_receive(vport, skb, &tun_info);
} while ((skb = next));
}
static int stt_tnl_get_options(const struct vport *vport,
struct sk_buff *skb)
{
struct stt_port *stt_port = stt_vport(vport);
struct inet_sock *sk = inet_sk(stt_port->stt_sock->sock->sk);
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport)))
return -EMSGSIZE;
return 0;
}
static void stt_tnl_destroy(struct vport *vport)
{
struct stt_port *stt_port = stt_vport(vport);
stt_sock_release(stt_port->stt_sock);
ovs_vport_deferred_free(vport);
}
static struct vport *stt_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
struct stt_port *stt_port;
struct stt_sock *stt_sock;
struct vport *vport;
struct nlattr *a;
int err;
u16 dst_port;
if (!options) {
err = -EINVAL;
goto error;
}
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
dst_port = nla_get_u16(a);
} else {
/* Require destination port from userspace. */
err = -EINVAL;
goto error;
}
vport = ovs_vport_alloc(sizeof(struct stt_port),
&ovs_stt_vport_ops, parms);
if (IS_ERR(vport))
return vport;
stt_port = stt_vport(vport);
strncpy(stt_port->name, parms->name, IFNAMSIZ);
stt_sock = stt_sock_add(net, htons(dst_port), stt_rcv, vport);
if (IS_ERR(stt_sock)) {
ovs_vport_free(vport);
return ERR_CAST(stt_sock);
}
stt_port->stt_sock = stt_sock;
return vport;
error:
return ERR_PTR(err);
}
static int stt_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct stt_port *stt_port = stt_vport(vport);
__be16 dport = inet_sk(stt_port->stt_sock->sock->sk)->inet_sport;
const struct ovs_key_ipv4_tunnel *tun_key;
const struct ovs_tunnel_info *tun_info;
struct rtable *rt;
__be16 sport;
__be32 saddr;
__be16 df;
int err;
tun_info = OVS_CB(skb)->egress_tun_info;
if (unlikely(!tun_info)) {
err = -EINVAL;
goto error;
}
tun_key = &tun_info->tunnel;
/* Route lookup */
saddr = tun_key->ipv4_src;
rt = find_route(ovs_dp_get_net(vport->dp),
&saddr, tun_key->ipv4_dst,
IPPROTO_TCP, tun_key->ipv4_tos,
skb->mark);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
}
df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
skb->ignore_df = 1;
return stt_xmit_skb(skb, rt, saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos, tun_key->ipv4_ttl,
df, sport, dport, tun_key->tun_id);
error:
kfree_skb(skb);
return err;
}
static const char *stt_tnl_get_name(const struct vport *vport)
{
return stt_vport(vport)->name;
}
static int stt_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ovs_tunnel_info *egress_tun_info)
{
struct stt_port *stt_port = stt_vport(vport);
struct net *net = ovs_dp_get_net(vport->dp);
__be16 dport = inet_sk(stt_port->stt_sock->sock->sk)->inet_sport;
__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
/* Get tp_src and tp_dst, refert to stt_build_header().
*/
return ovs_tunnel_get_egress_info(egress_tun_info,
ovs_dp_get_net(vport->dp),
OVS_CB(skb)->egress_tun_info,
IPPROTO_UDP, skb->mark, sport, dport);
}
static struct vport_ops ovs_stt_vport_ops = {
.type = OVS_VPORT_TYPE_STT,
.create = stt_tnl_create,
.destroy = stt_tnl_destroy,
.get_name = stt_tnl_get_name,
.get_options = stt_tnl_get_options,
.send = stt_tnl_send,
.get_egress_tun_info = stt_get_egress_tun_info,
.owner = THIS_MODULE,
};
static int __init ovs_stt_tnl_init(void)
{
int err;
err = stt_init_module();
if (err)
return err;
err = ovs_vport_ops_register(&ovs_stt_vport_ops);
if (err)
stt_cleanup_module();
return err;
}
static void __exit ovs_stt_tnl_exit(void)
{
ovs_vport_ops_unregister(&ovs_stt_vport_ops);
stt_cleanup_module();
}
module_init(ovs_stt_tnl_init);
module_exit(ovs_stt_tnl_exit);
MODULE_DESCRIPTION("OVS: STT switching port");
MODULE_LICENSE("GPL");
MODULE_ALIAS("vport-type-106");
#endif

View File

@ -768,6 +768,9 @@ get_vport_type(const struct dpif_netlink_vport *vport)
case OVS_VPORT_TYPE_LISP:
return "lisp";
case OVS_VPORT_TYPE_STT:
return "stt";
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
break;
@ -787,6 +790,8 @@ netdev_to_ovs_vport_type(const struct netdev *netdev)
return OVS_VPORT_TYPE_NETDEV;
} else if (!strcmp(type, "internal")) {
return OVS_VPORT_TYPE_INTERNAL;
} else if (strstr(type, "stt")) {
return OVS_VPORT_TYPE_STT;
} else if (!strcmp(type, "geneve")) {
return OVS_VPORT_TYPE_GENEVE;
} else if (strstr(type, "gre64")) {

View File

@ -55,6 +55,7 @@ static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
#define GENEVE_DST_PORT 6081
#define VXLAN_DST_PORT 4789
#define LISP_DST_PORT 4341
#define STT_DST_PORT 7471
#define VXLAN_HLEN (sizeof(struct eth_header) + \
sizeof(struct ip_header) + \
@ -158,7 +159,7 @@ netdev_vport_needs_dst_port(const struct netdev *dev)
return (class->get_config == get_tunnel_config &&
(!strcmp("geneve", type) || !strcmp("vxlan", type) ||
!strcmp("lisp", type)));
!strcmp("lisp", type) || !strcmp("stt", type)) );
}
const char *
@ -257,6 +258,8 @@ netdev_vport_construct(struct netdev *netdev_)
dev->tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
} else if (!strcmp(type, "lisp")) {
dev->tnl_cfg.dst_port = htons(LISP_DST_PORT);
} else if (!strcmp(type, "stt")) {
dev->tnl_cfg.dst_port = htons(STT_DST_PORT);
}
return 0;
@ -432,7 +435,7 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
struct smap_node *node;
has_csum = strstr(type, "gre") || strstr(type, "geneve") ||
strstr(type, "vxlan");
strstr(type, "stt") || strstr(type, "vxlan");
ipsec_mech_set = false;
memset(&tnl_cfg, 0, sizeof tnl_cfg);
@ -449,6 +452,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
tnl_cfg.dst_port = htons(LISP_DST_PORT);
}
if (!strcmp(type, "stt")) {
tnl_cfg.dst_port = htons(STT_DST_PORT);
}
needs_dst_port = netdev_vport_needs_dst_port(dev_);
tnl_cfg.ipsec = strstr(type, "ipsec");
tnl_cfg.dont_fragment = true;
@ -688,7 +695,8 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
(!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
(!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) {
(!strcmp("lisp", type) && dst_port != LISP_DST_PORT) ||
(!strcmp("stt", type) && dst_port != STT_DST_PORT)) {
smap_add_format(args, "dst_port", "%d", dst_port);
}
}
@ -1401,7 +1409,8 @@ netdev_vport_tunnel_register(void)
TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
push_udp_header,
netdev_vxlan_pop_header),
TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL)
TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL),
TUNNEL_CLASS("stt", "stt_sys", NULL, NULL, NULL),
};
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;

View File

@ -48,8 +48,8 @@ static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
* used to indicate the type of tunnel (0x01 = VxLAN, 0x02 = GRE) and the three
* least significant bytes hold the value of the layer 2 overlay network
* segment identifier: a 24-bit VxLAN tunnel's VNI or a 24-bit GRE tunnel's
* TNI. This is not compatible with GRE-64, as implemented in OVS, as its
* tunnel IDs are 64-bit.
* TNI. This is not compatible with GRE-64 or STT, as implemented in OVS, as
* their tunnel IDs are 64-bit.
*
* Two new enterprise information elements are defined which are similar to
* laryerSegmentId but support 64-bit IDs:
@ -64,6 +64,7 @@ enum dpif_ipfix_tunnel_type {
DPIF_IPFIX_TUNNEL_VXLAN = 0x01,
DPIF_IPFIX_TUNNEL_GRE = 0x02,
DPIF_IPFIX_TUNNEL_LISP = 0x03,
DPIF_IPFIX_TUNNEL_STT = 0x04,
DPIF_IPFIX_TUNNEL_IPSEC_GRE = 0x05,
DPIF_IPFIX_TUNNEL_GENEVE = 0x07,
NUM_DPIF_IPFIX_TUNNEL
@ -299,7 +300,7 @@ static uint8_t tunnel_protocol[NUM_DPIF_IPFIX_TUNNEL] = {
IPPROTO_UDP, /* DPIF_IPFIX_TUNNEL_VXLAN */
IPPROTO_GRE, /* DPIF_IPFIX_TUNNEL_GRE */
IPPROTO_UDP, /* DPIF_IPFIX_TUNNEL_LISP*/
0 , /* reserved */
IPPROTO_TCP, /* DPIF_IPFIX_TUNNEL_STT*/
IPPROTO_GRE, /* DPIF_IPFIX_TUNNEL_IPSEC_GRE */
0 , /* reserved */
IPPROTO_UDP, /* DPIF_IPFIX_TUNNEL_GENEVE*/
@ -353,6 +354,7 @@ BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_ip) == 32);
* VxLAN: 24-bit VIN,
* GRE: 32- or 64-bit key,
* LISP: 24-bit instance ID
* STT: 64-bit key
*/
#define MAX_TUNNEL_KEY_LEN 8
@ -607,6 +609,9 @@ dpif_ipfix_add_tunnel_port(struct dpif_ipfix *di, struct ofport *ofport,
} else if (strcmp(type, "geneve") == 0) {
dip->tunnel_type = DPIF_IPFIX_TUNNEL_GENEVE;
dip->tunnel_key_length = 3;
} else if (strcmp(type, "stt") == 0) {
dip->tunnel_type = DPIF_IPFIX_TUNNEL_STT;
dip->tunnel_key_length = 8;
} else {
free(dip);
goto out;

View File

@ -1892,6 +1892,25 @@
</p>
</dd>
<dt><code>stt</code></dt>
<dd>
The Stateless TCP Tunnel (STT) is particularly useful when tunnel
endpoints are in end-systems, as it utilizes the capabilities of
standard network interface cards to improve performance. STT utilizes
a TCP-like header inside the IP header. It is stateless, i.e., there is
no TCP connection state of any kind associated with the tunnel. The
TCP-like header is used to leverage the capabilities of existing
network interface cards, but should not be interpreted as implying
any sort of connection state between endpoints.
Since the STT protocol does not engage in the usual TCP 3-way handshake,
so it will have difficulty traversing stateful firewalls.
The protocol is documented at
http://www.ietf.org/archive/id/draft-davie-stt-06.txt
All traffic uses a default destination port of 7471. STT is only
available in kernel datapath on kernel 3.5 or newer.
</dd>
<dt><code>patch</code></dt>
<dd>
A pair of virtual devices that act as a patch cable.
@ -1909,7 +1928,7 @@
These options apply to interfaces with <ref column="type"/> of
<code>geneve</code>, <code>gre</code>, <code>ipsec_gre</code>,
<code>gre64</code>, <code>ipsec_gre64</code>, <code>vxlan</code>,
and <code>lisp</code>.
<code>lisp</code> and <code>stt</code>.
</p>
<p>
@ -1998,8 +2017,8 @@
</li>
<li>
A positive 24-bit (for Geneve, VXLAN, and LISP), 32-bit (for GRE)
or 64-bit (for GRE64) number. The tunnel receives only packets
with the specified key.
or 64-bit (for GRE64 and STT) number. The tunnel receives only
packets with the specified key.
</li>
<li>
The word <code>flow</code>. The tunnel accepts packets with any
@ -2025,8 +2044,8 @@
</li>
<li>
A positive 24-bit (for Geneve, VXLAN and LISP), 32-bit (for GRE) or
64-bit (for GRE64) number. Packets sent through the tunnel will
have the specified key.
64-bit (for GRE64 and STT) number. Packets sent through the tunnel
will have the specified key.
</li>
<li>
The word <code>flow</code>. Packets sent through the tunnel will