2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-23 10:28:00 +00:00
ovs/lib/dpif-netlink-rtnl.c

619 lines
19 KiB
C
Raw Normal View History

/*
* Copyright (c) 2017 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "dpif-netlink-rtnl.h"
#include <net/if.h>
#include <linux/ip.h>
#include <linux/rtnetlink.h>
#include "dpif-netlink.h"
#include "netdev-vport.h"
#include "netlink-socket.h"
#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl);
/* On some older systems, these enums are not defined. */
#ifndef IFLA_VXLAN_MAX
#define IFLA_VXLAN_MAX 0
#endif
#if IFLA_VXLAN_MAX < 27
#define IFLA_VXLAN_LEARNING 7
#define IFLA_VXLAN_PORT 15
#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
#define IFLA_VXLAN_GBP 23
#define IFLA_VXLAN_COLLECT_METADATA 25
#define IFLA_VXLAN_GPE 27
#endif
#ifndef IFLA_GRE_MAX
#define IFLA_GRE_MAX 0
#endif
#if IFLA_GRE_MAX < 18
#define IFLA_GRE_COLLECT_METADATA 18
#endif
#ifndef IFLA_GENEVE_MAX
#define IFLA_GENEVE_MAX 0
#endif
#if IFLA_GENEVE_MAX < 10
#define IFLA_GENEVE_PORT 5
#define IFLA_GENEVE_COLLECT_METADATA 6
#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
#endif
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
#ifndef IFLA_BAREUDP_MAX
#define IFLA_BAREUDP_MAX 0
#endif
#if IFLA_BAREUDP_MAX < 4
#define IFLA_BAREUDP_PORT 1
#define IFLA_BAREUDP_ETHERTYPE 2
#define IFLA_BAREUDP_SRCPORT_MIN 3
#define IFLA_BAREUDP_MULTIPROTO_MODE 4
#endif
#define BAREUDP_SRCPORT_MIN 49153
static const struct nl_policy rtlink_policy[] = {
[IFLA_LINKINFO] = { .type = NL_A_NESTED },
};
static const struct nl_policy linkinfo_policy[] = {
[IFLA_INFO_KIND] = { .type = NL_A_STRING },
[IFLA_INFO_DATA] = { .type = NL_A_NESTED },
};
static const struct nl_policy vxlan_policy[] = {
[IFLA_VXLAN_COLLECT_METADATA] = { .type = NL_A_U8 },
[IFLA_VXLAN_LEARNING] = { .type = NL_A_U8 },
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
[IFLA_VXLAN_PORT] = { .type = NL_A_U16 },
[IFLA_VXLAN_GBP] = { .type = NL_A_FLAG, .optional = true },
[IFLA_VXLAN_GPE] = { .type = NL_A_FLAG, .optional = true },
};
static const struct nl_policy gre_policy[] = {
[IFLA_GRE_COLLECT_METADATA] = { .type = NL_A_FLAG },
};
static const struct nl_policy geneve_policy[] = {
[IFLA_GENEVE_COLLECT_METADATA] = { .type = NL_A_FLAG },
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
[IFLA_GENEVE_PORT] = { .type = NL_A_U16 },
};
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
static const struct nl_policy bareudp_policy[] = {
[IFLA_BAREUDP_PORT] = { .type = NL_A_U16 },
[IFLA_BAREUDP_ETHERTYPE] = { .type = NL_A_U16 },
};
static const char *
vport_type_to_kind(enum ovs_vport_type type,
const struct netdev_tunnel_config *tnl_cfg)
{
switch (type) {
case OVS_VPORT_TYPE_VXLAN:
return "vxlan";
case OVS_VPORT_TYPE_GRE:
if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) {
return "gre";
} else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) {
return "gretap";
} else {
return NULL;
}
case OVS_VPORT_TYPE_GENEVE:
return "geneve";
case OVS_VPORT_TYPE_ERSPAN:
return "erspan";
case OVS_VPORT_TYPE_IP6ERSPAN:
return "ip6erspan";
case OVS_VPORT_TYPE_IP6GRE:
if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) {
return "ip6gretap";
} else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) {
return NULL;
} else {
return NULL;
}
case OVS_VPORT_TYPE_GTPU:
return NULL;
case OVS_VPORT_TYPE_SRV6:
return "srv6";
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
break;
}
return NULL;
}
static int
rtnl_transact(uint32_t type, uint32_t flags, const char *name,
struct ofpbuf **reply)
{
struct ofpbuf request;
int err;
ofpbuf_init(&request, 0);
nl_msg_put_nlmsghdr(&request, 0, type, flags);
ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
nl_msg_put_string(&request, IFLA_IFNAME, name);
err = nl_transact(NETLINK_ROUTE, &request, reply);
ofpbuf_uninit(&request);
return err;
}
static int
dpif_netlink_rtnl_destroy(const char *name)
{
return rtnl_transact(RTM_DELLINK, NLM_F_REQUEST | NLM_F_ACK, name, NULL);
}
static int
dpif_netlink_rtnl_getlink(const char *name, struct ofpbuf **reply)
{
return rtnl_transact(RTM_GETLINK, NLM_F_REQUEST, name, reply);
}
static int
rtnl_policy_parse(const char *kind, struct ofpbuf *reply,
const struct nl_policy *policy,
struct nlattr *tnl_info[],
size_t policy_size)
{
struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
int error = 0;
if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
rtlink_policy, rtlink, ARRAY_SIZE(rtlink_policy))
|| !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
linkinfo, ARRAY_SIZE(linkinfo_policy))
|| strcmp(nl_attr_get_string(linkinfo[IFLA_INFO_KIND]), kind)
|| !nl_parse_nested(linkinfo[IFLA_INFO_DATA], policy,
tnl_info, policy_size)) {
error = EINVAL;
}
return error;
}
static int
dpif_netlink_rtnl_vxlan_verify(const struct netdev_tunnel_config *tnl_cfg,
const char *kind, struct ofpbuf *reply)
{
struct nlattr *vxlan[ARRAY_SIZE(vxlan_policy)];
int err;
err = rtnl_policy_parse(kind, reply, vxlan_policy, vxlan,
ARRAY_SIZE(vxlan_policy));
if (!err) {
if (0 != nl_attr_get_u8(vxlan[IFLA_VXLAN_LEARNING])
|| 1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_COLLECT_METADATA])
|| 1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])
|| (tnl_cfg->dst_port
!= nl_attr_get_be16(vxlan[IFLA_VXLAN_PORT]))
|| (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)
&& !nl_attr_get_flag(vxlan[IFLA_VXLAN_GBP]))
|| (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)
&& !nl_attr_get_flag(vxlan[IFLA_VXLAN_GPE]))) {
err = EINVAL;
}
}
return err;
}
static int
dpif_netlink_rtnl_gre_verify(const struct netdev_tunnel_config OVS_UNUSED *tnl,
const char *kind, struct ofpbuf *reply)
{
struct nlattr *gre[ARRAY_SIZE(gre_policy)];
int err;
err = rtnl_policy_parse(kind, reply, gre_policy, gre,
ARRAY_SIZE(gre_policy));
if (!err) {
if (!nl_attr_get_flag(gre[IFLA_GRE_COLLECT_METADATA])) {
err = EINVAL;
}
}
return err;
}
static int
dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config *tnl_cfg,
const char *kind, struct ofpbuf *reply)
{
struct nlattr *geneve[ARRAY_SIZE(geneve_policy)];
int err;
err = rtnl_policy_parse(kind, reply, geneve_policy, geneve,
ARRAY_SIZE(geneve_policy));
if (!err) {
if (!nl_attr_get_flag(geneve[IFLA_GENEVE_COLLECT_METADATA])
|| 1 != nl_attr_get_u8(geneve[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])
|| (tnl_cfg->dst_port
!= nl_attr_get_be16(geneve[IFLA_GENEVE_PORT]))) {
err = EINVAL;
}
}
return err;
}
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
static int
dpif_netlink_rtnl_bareudp_verify(const struct netdev_tunnel_config *tnl_cfg,
const char *kind, struct ofpbuf *reply)
{
struct nlattr *bareudp[ARRAY_SIZE(bareudp_policy)];
int err;
err = rtnl_policy_parse(kind, reply, bareudp_policy, bareudp,
ARRAY_SIZE(bareudp_policy));
if (!err) {
if ((tnl_cfg->dst_port != nl_attr_get_be16(bareudp[IFLA_BAREUDP_PORT]))
|| (tnl_cfg->payload_ethertype
!= nl_attr_get_be16(bareudp[IFLA_BAREUDP_ETHERTYPE]))) {
err = EINVAL;
}
}
return err;
}
static int
dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
enum ovs_vport_type type, const char *name)
{
struct ofpbuf *reply;
const char *kind;
int err;
kind = vport_type_to_kind(type, tnl_cfg);
if (!kind) {
return EOPNOTSUPP;
}
err = dpif_netlink_rtnl_getlink(name, &reply);
if (err) {
return err;
}
switch (type) {
case OVS_VPORT_TYPE_VXLAN:
err = dpif_netlink_rtnl_vxlan_verify(tnl_cfg, kind, reply);
break;
case OVS_VPORT_TYPE_GRE:
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
err = dpif_netlink_rtnl_gre_verify(tnl_cfg, kind, reply);
break;
case OVS_VPORT_TYPE_GENEVE:
err = dpif_netlink_rtnl_geneve_verify(tnl_cfg, kind, reply);
break;
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
case OVS_VPORT_TYPE_BAREUDP:
err = dpif_netlink_rtnl_bareudp_verify(tnl_cfg, kind, reply);
break;
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
OVS_NOT_REACHED();
}
ofpbuf_delete(reply);
return err;
}
static int
rtnl_set_mtu(const char *name, uint32_t mtu, struct ofpbuf *request)
{
ofpbuf_clear(request);
nl_msg_put_nlmsghdr(request, 0, RTM_SETLINK,
NLM_F_REQUEST | NLM_F_ACK);
ofpbuf_put_zeros(request, sizeof(struct ifinfomsg));
nl_msg_put_string(request, IFLA_IFNAME, name);
nl_msg_put_u32(request, IFLA_MTU, mtu);
return nl_transact(NETLINK_ROUTE, request, NULL);
}
static int
dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
const char *name, enum ovs_vport_type type,
const char *kind, uint32_t flags)
{
enum {
/* For performance, we want to use the largest MTU that the system
* supports. Most existing tunnels will accept UINT16_MAX, treating it
* as the actual max MTU, but some do not. Thus, we use a slightly
* smaller value, that should always be safe yet does not noticeably
* reduce performance. */
MAX_MTU = 65000
};
size_t linkinfo_off, infodata_off;
struct ifinfomsg *ifinfo;
struct ofpbuf request;
int err;
ofpbuf_init(&request, 0);
nl_msg_put_nlmsghdr(&request, 0, RTM_NEWLINK, flags);
ifinfo = ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
nl_msg_put_string(&request, IFLA_IFNAME, name);
nl_msg_put_u32(&request, IFLA_MTU, MAX_MTU);
linkinfo_off = nl_msg_start_nested(&request, IFLA_LINKINFO);
nl_msg_put_string(&request, IFLA_INFO_KIND, kind);
infodata_off = nl_msg_start_nested(&request, IFLA_INFO_DATA);
/* tunnel unique info */
switch (type) {
case OVS_VPORT_TYPE_VXLAN:
nl_msg_put_u8(&request, IFLA_VXLAN_LEARNING, 0);
nl_msg_put_u8(&request, IFLA_VXLAN_COLLECT_METADATA, 1);
nl_msg_put_u8(&request, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
nl_msg_put_flag(&request, IFLA_VXLAN_GBP);
}
if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
nl_msg_put_flag(&request, IFLA_VXLAN_GPE);
}
nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
break;
case OVS_VPORT_TYPE_GRE:
compat: Add ipv6 GRE and IPV6 Tunneling This patch backports upstream ipv6 GRE and tunneling into the OVS OOT (Out of Tree) datapath drivers. The primary reason for this is to support the ERSPAN feature. Because there is no previous history of ipv6 GRE and tunneling it is not possible to exactly reproduce the history of all the files in the patch. The two newly added files - ip6_gre.c and ip6_tunnel.c - are cut from whole cloth out of the upstream Linux 4.15 kernel and then modified as necessary with compatibility layer fixups. These two files already included parts of several other upstream commits that also touched other upstream files. As such, this patch may incorporate parts or all of the following commits: d350a82 net: erspan: create erspan metadata uapi header c69de58 net: erspan: use bitfield instead of mask and offset b423d13 net: erspan: fix use-after-free 214bb1c net: erspan: remove md NULL check afb4c97 ip6_gre: fix potential memory leak in ip6erspan_rcv 50670b6 ip_gre: fix potential memory leak in erspan_rcv a734321 ip6_gre: fix error path when ip6erspan_rcv failed dd8d5b8 ip_gre: fix error path when erspan_rcv failed 293a199 ip6_gre: fix a pontential issue in ip6erspan_rcv d91e8db5 net: erspan: reload pointer after pskb_may_pull ae3e133 net: erspan: fix wrong return value c05fad5 ip_gre: fix wrong return value of erspan_rcv 94d7d8f ip6_gre: add erspan v2 support f551c91 net: erspan: introduce erspan v2 for ip_gre 1d7e2ed net: erspan: refactor existing erspan code ef7baf5 ip6_gre: add ip6 erspan collect_md mode 5a963eb ip6_gre: Add ERSPAN native tunnel support ceaa001 openvswitch: Add erspan tunnel support. f192970 ip_gre: check packet length and mtu correctly in erspan tx c84bed4 ip_gre: erspan device should keep dst c122fda ip_gre: set tunnel hlen properly in erspan_tunnel_init 5513d08 ip_gre: check packet length and mtu correctly in erspan_xmit 935a974 ip_gre: get key from session_id correctly in erspan_rcv 1a66a83 gre: add collect_md mode to ERSPAN tunnel 84e54fe gre: introduce native tunnel support for ERSPAN In cases where the listed commits also touched other source code files then the patches are also listed separately within this patch series. Signed-off-by: Greg Rose <gvrose8192@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org> Acked-by: William Tu <u9012063@gmail.com>
2018-03-05 10:11:57 -08:00
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
nl_msg_put_flag(&request, IFLA_GRE_COLLECT_METADATA);
break;
case OVS_VPORT_TYPE_GENEVE:
nl_msg_put_flag(&request, IFLA_GENEVE_COLLECT_METADATA);
nl_msg_put_u8(&request, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1);
nl_msg_put_be16(&request, IFLA_GENEVE_PORT, tnl_cfg->dst_port);
break;
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
case OVS_VPORT_TYPE_BAREUDP:
nl_msg_put_be16(&request, IFLA_BAREUDP_ETHERTYPE,
tnl_cfg->payload_ethertype);
nl_msg_put_u16(&request, IFLA_BAREUDP_SRCPORT_MIN,
BAREUDP_SRCPORT_MIN);
nl_msg_put_be16(&request, IFLA_BAREUDP_PORT, tnl_cfg->dst_port);
if (tnl_cfg->exts & (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE)) {
nl_msg_put_flag(&request, IFLA_BAREUDP_MULTIPROTO_MODE);
}
break;
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
err = EOPNOTSUPP;
goto exit;
}
nl_msg_end_nested(&request, infodata_off);
nl_msg_end_nested(&request, linkinfo_off);
err = nl_transact(NETLINK_ROUTE, &request, NULL);
if (!err && (type == OVS_VPORT_TYPE_GRE ||
type == OVS_VPORT_TYPE_IP6GRE)) {
/* Work around a bug in kernel GRE driver, which ignores IFLA_MTU in
* RTM_NEWLINK, by setting the MTU again. See
* https://bugzilla.redhat.com/show_bug.cgi?id=1488484.
*
* In case of MAX_MTU exceeds hw max MTU, retry a smaller value. */
int err2 = rtnl_set_mtu(name, MAX_MTU, &request);
if (err2) {
err2 = rtnl_set_mtu(name, 1450, &request);
}
if (err2) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "setting MTU of tunnel %s failed (%s)",
name, ovs_strerror(err2));
}
}
exit:
ofpbuf_uninit(&request);
return err;
}
int
dpif_netlink_rtnl_port_create(struct netdev *netdev)
{
const struct netdev_tunnel_config *tnl_cfg;
char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
enum ovs_vport_type type;
const char *name;
const char *kind;
uint32_t flags;
int err;
type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
tnl_cfg = netdev_get_tunnel_config(netdev);
if (!tnl_cfg) {
return EOPNOTSUPP;
}
kind = vport_type_to_kind(type, tnl_cfg);
if (!kind) {
return EOPNOTSUPP;
}
name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL;
err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
/* If the device exists, validate and/or attempt to recreate it. */
if (err == EEXIST) {
err = dpif_netlink_rtnl_verify(tnl_cfg, type, name);
if (!err) {
return 0;
}
err = dpif_netlink_rtnl_destroy(name);
if (err) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "RTNL device %s exists and cannot be "
"deleted: %s", name, ovs_strerror(err));
return err;
}
err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
}
if (err) {
return err;
}
err = dpif_netlink_rtnl_verify(tnl_cfg, type, name);
if (err) {
int err2 = dpif_netlink_rtnl_destroy(name);
if (err2) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "Failed to delete device %s during rtnl port "
"creation: %s", name, ovs_strerror(err2));
}
}
return err;
}
int
dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
{
switch (netdev_to_ovs_vport_type(type)) {
case OVS_VPORT_TYPE_VXLAN:
case OVS_VPORT_TYPE_GRE:
case OVS_VPORT_TYPE_GENEVE:
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
case OVS_VPORT_TYPE_SRV6:
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
case OVS_VPORT_TYPE_BAREUDP:
return dpif_netlink_rtnl_destroy(name);
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
return EOPNOTSUPP;
}
return 0;
}
/**
* Probe for whether the modules are out-of-tree (openvswitch) or in-tree
* (upstream kernel).
*
* We probe for "ovs_geneve" via rtnetlink. As long as this returns something
* other than EOPNOTSUPP we know that the module in use is the out-of-tree one.
* This will be used to determine which netlink interface to use when creating
* ports; rtnetlink or compat/genetlink.
*
* See ovs_tunnels_out_of_tree
*/
bool
dpif_netlink_rtnl_probe_oot_tunnels(void)
{
char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
struct netdev *netdev = NULL;
bool out_of_tree = false;
const char *name;
int error;
error = netdev_open("ovs-system-probe", "geneve", &netdev);
if (!error) {
struct ofpbuf *reply;
const struct netdev_tunnel_config *tnl_cfg;
tnl_cfg = netdev_get_tunnel_config(netdev);
if (!tnl_cfg) {
netdev_close(netdev);
return true;
}
name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
/* The geneve module exists when ovs-vswitchd crashes
* and restarts, handle the case here.
*/
error = dpif_netlink_rtnl_getlink(name, &reply);
if (!error) {
struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
const char *kind;
if (!nl_policy_parse(reply,
NLMSG_HDRLEN + sizeof(struct ifinfomsg),
rtlink_policy, rtlink,
ARRAY_SIZE(rtlink_policy))
|| !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
linkinfo, ARRAY_SIZE(linkinfo_policy))) {
VLOG_ABORT("Error fetching Geneve tunnel device %s "
"linkinfo", name);
}
kind = nl_attr_get_string(linkinfo[IFLA_INFO_KIND]);
if (!strcmp(kind, "ovs_geneve")) {
out_of_tree = true;
} else if (!strcmp(kind, "geneve")) {
out_of_tree = false;
} else {
VLOG_ABORT("Geneve tunnel device %s with kind %s"
" not supported", name, kind);
}
ofpbuf_delete(reply);
netdev_close(netdev);
return out_of_tree;
}
error = dpif_netlink_rtnl_create(tnl_cfg, name, OVS_VPORT_TYPE_GENEVE,
"ovs_geneve",
(NLM_F_REQUEST | NLM_F_ACK
| NLM_F_CREATE));
if (error != EOPNOTSUPP) {
if (!error) {
dpif_netlink_rtnl_destroy(name);
}
out_of_tree = true;
}
netdev_close(netdev);
}
return out_of_tree;
}