2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-15 14:17:18 +00:00

datapath: Enable OVS GSO to be used up to 3.18 if necessary.

There are two important GSO tunnel features that were introduced
after the 3.12 cutoff for our current out of tree GSO implementation:
 * 3.16 introduced support for outer UDP checksums.
 * 3.18 introduced support for verifying hardware support for protocols
   other than VXLAN.

In cases where these features are used, we should use OVS GSO to
ensure correct behavior. However, we also want to continue to use
kernel GSO or hardware TSO in existing situations. Therefore, this
extends the range of kernels where OVS GSO is available to 3.18 and
makes it easier to select which one to use.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
This commit is contained in:
Jesse Gross
2015-02-18 14:27:17 -08:00
parent cabd55169e
commit 9ffdbf4119
10 changed files with 165 additions and 170 deletions

View File

@@ -365,9 +365,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [netlink_has_listeners(net->genl_sock],
[OVS_DEFINE([HAVE_GENL_HAS_LISTENERS_TAKES_NET])])
OVS_GREP_IFELSE([$KSRC/include/net/gre.h], [gre_cisco_register])
OVS_GREP_IFELSE([$KSRC/include/net/gre.h], [gre_handle_offloads])
OVS_GREP_IFELSE([$KSRC/include/net/ip_tunnels.h], [iptunnel_xmit.*net],
[OVS_DEFINE([HAVE_IPTUNNEL_XMIT_NET])])
OVS_GREP_IFELSE([$KSRC/include/net/ipv6.h], [IP6_FH_F_SKIP_RH])
OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_get_be16])
OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_put_be16])
@@ -388,7 +385,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/linux/openvswitch.h], [openvswitch_handle_frame_hook],
[OVS_DEFINE([HAVE_RHEL_OVS_HOOK])])
OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [vxlan_xmit_skb])
OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [struct vxlan_metadata],
[OVS_DEFINE([HAVE_VXLAN_METADATA])])
OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_flow_src_port],

View File

@@ -268,9 +268,9 @@ int gre_cisco_unregister(struct gre_cisco_protocol *proto)
#endif /* !HAVE_GRE_CISCO_REGISTER */
#ifndef USE_KERNEL_TUNNEL_API
/* GRE TX side. */
static void gre_nop_fix(struct sk_buff *skb) { }
static void gre_csum_fix(struct sk_buff *skb)
{
struct gre_base_hdr *greh;
@@ -287,15 +287,15 @@ static void gre_csum_fix(struct sk_buff *skb)
struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
{
int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
gso_fix_segment_t fix_segment;
if (gre_csum)
fix_segment = gre_csum_fix;
else
fix_segment = NULL;
fix_segment = gre_nop_fix;
skb_reset_inner_headers(skb);
return ovs_iptunnel_handle_offloads(skb, gre_csum, fix_segment);
return ovs_iptunnel_handle_offloads(skb, gre_csum, type, fix_segment);
}
static bool is_gre_gso(struct sk_buff *skb)
@@ -332,7 +332,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
}
}
}
#endif
#endif /* CONFIG_NET_IPGRE_DEMUX */

View File

@@ -167,7 +167,9 @@ drop:
kfree_skb(skb);
return err;
}
#endif /* 3.16 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
static __be16 __skb_network_protocol(struct sk_buff *skb)
{
__be16 type = skb->protocol;
@@ -190,16 +192,6 @@ static __be16 __skb_network_protocol(struct sk_buff *skb)
return type;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
static void tnl_fix_segment(struct sk_buff *skb)
{
if (OVS_GSO_CB(skb)->fix_segment)
OVS_GSO_CB(skb)->fix_segment(skb);
}
#else
static void tnl_fix_segment(struct sk_buff *skb) { }
#endif
static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
netdev_features_t features,
bool tx_path)
@@ -240,7 +232,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
memcpy(ip_hdr(skb), iph, pkt_hlen);
memcpy(skb->cb, cb, sizeof(cb));
tnl_fix_segment(skb);
OVS_GSO_CB(skb)->fix_segment(skb);
skb->protocol = proto;
skb = skb->next;
@@ -250,11 +242,29 @@ free:
return segs;
}
static int output_ip(struct sk_buff *skb)
{
int ret = NETDEV_TX_OK;
int err;
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
#undef ip_local_out
err = ip_local_out(skb);
if (unlikely(net_xmit_eval(err)))
ret = err;
return ret;
}
int rpl_ip_local_out(struct sk_buff *skb)
{
int ret = NETDEV_TX_OK;
int id = -1;
if (!OVS_GSO_CB(skb)->fix_segment)
return output_ip(skb);
if (skb_is_gso(skb)) {
struct iphdr *iph;
@@ -274,7 +284,6 @@ int rpl_ip_local_out(struct sk_buff *skb)
while (skb) {
struct sk_buff *next_skb = skb->next;
struct iphdr *iph;
int err;
skb->next = NULL;
@@ -282,67 +291,9 @@ int rpl_ip_local_out(struct sk_buff *skb)
if (id >= 0)
iph->id = htons(id++);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
#undef ip_local_out
err = ip_local_out(skb);
if (unlikely(net_xmit_eval(err)))
ret = err;
ret = output_ip(skb);
skb = next_skb;
}
return ret;
}
#endif /* 3.16 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) || \
!defined USE_UPSTREAM_VXLAN
struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help,
void (*fix_segment)(struct sk_buff *))
{
int err;
/* XXX: synchronize inner header reset for compat and non compat code
* so that we can do it here.
*/
/*
skb_reset_inner_headers(skb);
*/
/* OVS compat code does not maintain encapsulation bit.
* skb->encapsulation = 1; */
if (skb_is_gso(skb)) {
if (skb_is_encapsulated(skb)) {
err = -ENOSYS;
goto error;
}
OVS_GSO_CB(skb)->fix_segment = fix_segment;
return skb;
}
/* If packet is not gso and we are resolving any partial checksum,
* clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
* on the outer header without confusing devices that implement
* NETIF_F_IP_CSUM with encapsulation.
*/
/*
if (csum_help)
skb->encapsulation = 0;
*/
if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
err = skb_checksum_help(skb);
if (unlikely(err))
goto error;
} else if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_NONE;
return skb;
error:
kfree_skb(skb);
return ERR_PTR(err);
}
#endif /* 3.12 || !USE_UPSTREAM_VXLAN */
#endif /* 3.18 */

View File

@@ -2,8 +2,7 @@
#define __LINUX_GSO_WRAPPER_H
#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) || \
!defined USE_UPSTREAM_VXLAN
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
#include <linux/netdevice.h>
#include <linux/skbuff.h>
@@ -15,75 +14,55 @@ typedef void (*gso_fix_segment_t)(struct sk_buff *);
struct ovs_gso_cb {
struct ovs_skb_cb dp_cb;
gso_fix_segment_t fix_segment;
sk_buff_data_t inner_mac_header; /* Offset from skb->head */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
__be16 inner_protocol;
#endif
u16 inner_network_header; /* Offset from
* inner_mac_header */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
unsigned int inner_mac_header;
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
unsigned int inner_network_header;
#endif
};
#define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb)
#define skb_inner_network_header rpl_skb_inner_network_header
#endif
#ifdef NET_SKBUFF_DATA_USES_OFFSET
#define skb_inner_mac_header rpl_skb_inner_mac_header
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
{
return skb->head + OVS_GSO_CB(skb)->inner_mac_header;
}
#else
#define skb_inner_mac_header rpl_skb_inner_mac_header
static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
static inline void skb_set_inner_mac_header(const struct sk_buff *skb,
int offset)
{
return OVS_GSO_CB(skb)->inner_mac_header;
OVS_GSO_CB(skb)->inner_mac_header = (skb->data - skb->head) + offset;
}
#endif
#define skb_inner_network_header rpl_skb_inner_network_header
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
{
return skb_inner_mac_header(skb) +
OVS_GSO_CB(skb)->inner_network_header;
return skb->head + OVS_GSO_CB(skb)->inner_network_header;
}
#define skb_inner_network_offset rpl_skb_inner_network_offset
static inline int skb_inner_network_offset(const struct sk_buff *skb)
{
return skb_inner_network_header(skb) - skb->data;
}
#define skb_reset_inner_headers rpl_skb_reset_inner_headers
static inline void skb_reset_inner_headers(struct sk_buff *skb)
static inline void skb_set_inner_network_header(const struct sk_buff *skb,
int offset)
{
BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb));
OVS_GSO_CB(skb)->inner_network_header = skb->network_header -
skb->mac_header;
OVS_GSO_CB(skb)->inner_mac_header = skb->mac_header;
OVS_GSO_CB(skb)->fix_segment = NULL;
OVS_GSO_CB(skb)->inner_network_header = (skb->data - skb->head)
+ offset;
}
struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help,
gso_fix_segment_t fix_segment);
#endif /* 3.12 || !USE_UPSTREAM_VXLAN */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
#define ip_local_out rpl_ip_local_out
int ip_local_out(struct sk_buff *skb);
#define skb_inner_mac_offset rpl_skb_inner_mac_offset
static inline int skb_inner_mac_offset(const struct sk_buff *skb)
{
return skb_inner_mac_header(skb) - skb->data;
}
#endif /* 3.16 */
static inline void skb_set_inner_transport_header(const struct sk_buff *skb,
int offset)
{ }
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) {
@@ -128,4 +107,24 @@ static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
return skb->inner_protocol;
}
#endif /* 3.11 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
#define ip_local_out rpl_ip_local_out
int ip_local_out(struct sk_buff *skb);
static inline int skb_inner_mac_offset(const struct sk_buff *skb)
{
return skb_inner_mac_header(skb) - skb->data;
}
#define skb_reset_inner_headers rpl_skb_reset_inner_headers
static inline void skb_reset_inner_headers(struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb));
skb_set_inner_mac_header(skb, skb_mac_header(skb) - skb->data);
skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
}
#endif /* 3.18 */
#endif

View File

@@ -6,6 +6,14 @@
#include <linux/jhash.h>
#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
#define SKB_GSO_GRE 0
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
#define SKB_GSO_GRE_CSUM 0
#endif
#ifndef HAVE_IGNORE_DF_RENAME
#define ignore_df local_df
#endif

View File

@@ -81,7 +81,7 @@ static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) */
#endif /* HAVE_GRE_CISCO_REGISTER */
#ifndef USE_KERNEL_TUNNEL_API
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
#define gre_build_header rpl_gre_build_header
void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,

View File

@@ -2,34 +2,12 @@
#define __NET_IP_TUNNELS_WRAPPER_H 1
#include <linux/version.h>
#if defined(HAVE_GRE_HANDLE_OFFLOADS) && \
LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) && \
defined(HAVE_VXLAN_XMIT_SKB)
/* RHEL6 and RHEL7 both has backported tunnel API but RHEL6 has
* older version, so avoid using RHEL6 backports.
*/
#define USE_KERNEL_TUNNEL_API
#endif
#ifdef USE_KERNEL_TUNNEL_API
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
#include_next <net/ip_tunnels.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0)
static inline int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt,
struct sk_buff *skb, __be32 src,
__be32 dst, __u8 proto, __u8 tos,
__u8 ttl, __be16 df, bool xnet)
{
#ifdef HAVE_IPTUNNEL_XMIT_NET
return iptunnel_xmit(NULL, rt, skb, src, dst, proto, tos, ttl, df);
#else
return iptunnel_xmit(rt, skb, src, dst, proto, tos, ttl, df, xnet);
#endif
}
#define iptunnel_xmit rpl_iptunnel_xmit
#endif
#else
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
#include <linux/if_tunnel.h>
#include <linux/netdevice.h>
@@ -41,6 +19,26 @@ static inline int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt,
#include <net/ip.h>
#include <net/rtnetlink.h>
struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help, int gso_type_mask,
void (*fix_segment)(struct sk_buff *));
#define iptunnel_xmit rpl_iptunnel_xmit
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
__be16 df, bool xnet);
#define iptunnel_pull_header rpl_iptunnel_pull_header
int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
#else
#define ovs_iptunnel_handle_offloads(skb, csum_help, gso_type_mask, fix_segment) \
iptunnel_handle_offloads(skb, csum_help, gso_type_mask)
#endif /* 3.18 */
#ifndef TUNNEL_CSUM
#define TUNNEL_CSUM __cpu_to_be16(0x01)
#define TUNNEL_ROUTING __cpu_to_be16(0x02)
#define TUNNEL_KEY __cpu_to_be16(0x04)
@@ -49,7 +47,6 @@ static inline int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt,
#define TUNNEL_REC __cpu_to_be16(0x20)
#define TUNNEL_VERSION __cpu_to_be16(0x40)
#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
struct tnl_ptk_info {
__be16 flags;
@@ -60,14 +57,10 @@ struct tnl_ptk_info {
#define PACKET_RCVD 0
#define PACKET_REJECT 1
#endif
int iptunnel_xmit(struct sock *sk, struct rtable *rt,
struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet);
int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
#ifndef TUNNEL_DONT_FRAGMENT
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
#endif
#ifndef TUNNEL_OAM

View File

@@ -8,7 +8,9 @@
#include <linux/version.h>
#ifdef USE_KERNEL_TUNNEL_API
#ifdef HAVE_VXLAN_METADATA
#define USE_UPSTREAM_VXLAN
#include_next <net/vxlan.h>
#endif
@@ -81,9 +83,7 @@ struct vxlanhdr_gbp {
#define VXLAN_F_RCV_FLAGS VXLAN_F_GBP
#endif
#ifdef HAVE_VXLAN_METADATA
#define USE_UPSTREAM_VXLAN
#ifdef USE_UPSTREAM_VXLAN
static inline int rpl_vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
@@ -100,7 +100,7 @@ static inline int rpl_vxlan_xmit_skb(struct vxlan_sock *vs,
}
#define vxlan_xmit_skb rpl_vxlan_xmit_skb
#else /* HAVE_VXLAN_METADATA */
#else /* USE_UPSTREAM_VXLAN */
struct vxlan_metadata {
__be32 vni;

View File

@@ -35,11 +35,10 @@
#include "compat.h"
#include "gso.h"
#ifndef USE_KERNEL_TUNNEL_API
int iptunnel_xmit(struct sock *sk, struct rtable *rt,
struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
int rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
__be16 df, bool xnet)
{
int pkt_len = skb->len;
struct iphdr *iph;
@@ -82,6 +81,58 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt,
return pkt_len;
}
struct sk_buff *ovs_iptunnel_handle_offloads(struct sk_buff *skb,
bool csum_help, int gso_type_mask,
void (*fix_segment)(struct sk_buff *))
{
int err;
if (likely(!skb_is_encapsulated(skb))) {
skb_reset_inner_headers(skb);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
skb->encapsulation = 1;
#endif
} else if (skb_is_gso(skb)) {
err = -ENOSYS;
goto error;
}
if (gso_type_mask)
fix_segment = NULL;
OVS_GSO_CB(skb)->fix_segment = fix_segment;
if (skb_is_gso(skb)) {
err = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(err))
goto error;
skb_shinfo(skb)->gso_type |= gso_type_mask;
return skb;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
/* If packet is not gso and we are resolving any partial checksum,
* clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
* on the outer header without confusing devices that implement
* NETIF_F_IP_CSUM with encapsulation.
*/
if (csum_help)
skb->encapsulation = 0;
#endif
if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
err = skb_checksum_help(skb);
if (unlikely(err))
goto error;
} else if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_NONE;
return skb;
error:
kfree_skb(skb);
return ERR_PTR(err);
}
int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
{
if (unlikely(!pskb_may_pull(skb, hdr_len)))

View File

@@ -62,13 +62,11 @@
#include "gso.h"
#include "vlan.h"
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
/* VXLAN protocol header */
struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
};
#endif
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)