From d7e77143fbdc855ce949a3f23e6dc7be5a6e0cd9 Mon Sep 17 00:00:00 2001 From: Mike Pattrick Date: Fri, 5 Jul 2024 16:45:01 -0400 Subject: [PATCH] tunnel: Allow UDP zero checksum with IPv6 tunnels. This patch adopts the proposed RFC 6935 by allowing null UDP checksums even if the tunnel protocol is IPv6. This is already supported by Linux through the udp6zerocsumtx tunnel option. It is disabled by default and IPv6 tunnels are flagged as requiring a checksum, but this patch enables the user to set csum=false on IPv6 tunnels. Acked-by: Simon Horman Signed-off-by: Mike Pattrick Signed-off-by: Ilya Maximets --- NEWS | 11 +++++++++++ lib/netdev-native-tnl.c | 2 +- lib/netdev-vport.c | 17 +++++++++++++++-- lib/netdev.h | 18 +++++++++++++++++- ofproto/tunnel.c | 10 ++++++++-- tests/tunnel-push-pop-ipv6.at | 9 +++++++++ tests/tunnel-push-pop.at | 7 +++++++ tests/tunnel.at | 2 +- vswitchd/vswitch.xml | 12 +++++++++--- 9 files changed, 78 insertions(+), 10 deletions(-) diff --git a/NEWS b/NEWS index d18693315..bd97ea3c5 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,10 @@ Post-v3.3.0 * Conntrack now supports 'random' flag for selecting ports in a range while natting and 'persistent' flag for selection of the IP address from a range. + * IPv6 UDP tunnel encapsulation including Geneve and VXLAN will now + honour the csum option. Configuring the interface with + "options:csum=false" now has the same effect as the udp6zerocsumtx + option has with Linux kernel UDP tunnels. - The primary development branch has been renamed from 'master' to 'main'. The OVS tree remains hosted on GitHub. https://github.com/openvswitch/ovs.git @@ -23,6 +27,13 @@ Post-v3.3.0 * Added custom transaction support to the Idl via add_op(). * Added support for different output formats like 'json' to Python's unixctl classes. + - Tunnels: + * Previously the kernel datapath did not enable UDP checksums by default + in IPv6 tunnels. This behaviour is non-standard, differs from the + Linux kernel, and as also different than the userspace datapath. Now + these tunnels will calculate checksums by default and that behaviour can + be changed with "options:csum=false" just as with the userspace + datapath. v3.3.0 - 16 Feb 2024 diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index 0f9f07f44..16c56608d 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -384,7 +384,7 @@ udp_build_header(const struct netdev_tunnel_config *tnl_cfg, udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP, 0); udp->udp_dst = tnl_cfg->dst_port; - if (params->is_ipv6 || params->flow->tunnel.flags & FLOW_TNL_F_CSUM) { + if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) { /* Write a value in now to mark that we should compute the checksum * later. 0xffff is handy because it is transparent to the * calculation. */ diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 60caa02fb..234a4ebe1 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -702,7 +702,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) tnl_cfg.dst_port = htons(atoi(node->value)); } else if (!strcmp(node->key, "csum") && has_csum) { if (!strcmp(node->value, "true")) { - tnl_cfg.csum = true; + tnl_cfg.csum = NETDEV_TNL_CSUM_ENABLED; + } else if (!strcmp(node->value, "false")) { + tnl_cfg.csum = NETDEV_TNL_CSUM_DISABLED; } } else if (!strcmp(node->key, "seq") && has_seq) { if (!strcmp(node->value, "true")) { @@ -850,6 +852,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) } } + /* The default csum state for GRE is special as it does have an optional + * checksum but the default configuration isn't correlated with IP version + * like UDP tunnels are. Likewise, tunnels with no checksum at all must be + * in this state. */ + if (tnl_cfg.csum == NETDEV_TNL_CSUM_DEFAULT && + (!has_csum || strstr(type, "gre"))) { + tnl_cfg.csum = NETDEV_TNL_DEFAULT_NO_CSUM; + } + enum tunnel_layers layers = tunnel_supported_layers(type, &tnl_cfg); const char *full_type = (strcmp(type, "vxlan") ? type : (tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GPE) @@ -1026,8 +1037,10 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) } } - if (tnl_cfg->csum) { + if (tnl_cfg->csum == NETDEV_TNL_CSUM_ENABLED) { smap_add(args, "csum", "true"); + } else if (tnl_cfg->csum == NETDEV_TNL_CSUM_DISABLED) { + smap_add(args, "csum", "false"); } if (tnl_cfg->set_seq) { diff --git a/lib/netdev.h b/lib/netdev.h index 67a8486bd..63e03d72d 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -111,6 +111,22 @@ enum netdev_srv6_flowlabel { SRV6_FLOWLABEL_COMPUTE, }; +enum netdev_tnl_csum { + /* Default value for UDP tunnels if no configurations is present. Enforce + * checksum calculation in IPv6 tunnels, disable in IPv4 tunnels. */ + NETDEV_TNL_CSUM_DEFAULT = 0, + + /* Checksum explicitly to be calculated. */ + NETDEV_TNL_CSUM_ENABLED, + + /* Checksum calculation explicitly disabled. */ + NETDEV_TNL_CSUM_DISABLED, + + /* A value for when there is no checksum or the default value is no + * checksum regardless of IP version. */ + NETDEV_TNL_DEFAULT_NO_CSUM, +}; + /* Configuration specific to tunnels. */ struct netdev_tunnel_config { ovs_be64 in_key; @@ -139,7 +155,7 @@ struct netdev_tunnel_config { uint8_t tos; bool tos_inherit; - bool csum; + enum netdev_tnl_csum csum; bool dont_fragment; enum netdev_pt_mode pt_mode; diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 80ddee78a..f067a6c26 100644 --- a/ofproto/tunnel.c +++ b/ofproto/tunnel.c @@ -465,9 +465,13 @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow, flow->tunnel.flags &= ~(FLOW_TNL_F_MASK & ~FLOW_TNL_PUB_F_MASK); flow->tunnel.flags |= (cfg->dont_fragment ? FLOW_TNL_F_DONT_FRAGMENT : 0) - | (cfg->csum ? FLOW_TNL_F_CSUM : 0) | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0); + if (cfg->csum == NETDEV_TNL_CSUM_ENABLED || + (cfg->csum == NETDEV_TNL_CSUM_DEFAULT && !flow->tunnel.ip_dst)) { + flow->tunnel.flags |= FLOW_TNL_F_CSUM; + } + if (cfg->set_egress_pkt_mark) { flow->pkt_mark = cfg->egress_pkt_mark; wc->masks.pkt_mark = UINT32_MAX; @@ -706,8 +710,10 @@ tnl_port_format(const struct tnl_port *tnl_port, struct ds *ds) ds_put_cstr(ds, ", df=false"); } - if (cfg->csum) { + if (cfg->csum == NETDEV_TNL_CSUM_ENABLED) { ds_put_cstr(ds, ", csum=true"); + } else if (cfg->csum == NETDEV_TNL_CSUM_DISABLED) { + ds_put_cstr(ds, ", csum=false"); } ds_put_cstr(ds, ")\n"); diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at index f1c5d42f6..3edec5fbc 100644 --- a/tests/tunnel-push-pop-ipv6.at +++ b/tests/tunnel-push-pop-ipv6.at @@ -610,6 +610,15 @@ AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl 2001:cafe::93 f8:bc:12:44:34:b7 br0 ]) +dnl Disable checksum from VXLAN port. +AT_CHECK([ovs-vsctl set Interface t3 options:csum=false]) +AT_CHECK([ovs-ofctl del-flows int-br]) +AT_CHECK([ovs-ofctl add-flow int-br action=4]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:34:b7,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::93,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x0)),out_port(100)),1 +]) + ovs-appctl time/warp 10000 AT_CHECK([ovs-vsctl del-port int-br t3 \ diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index 508737c53..7ec4c31ab 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -642,6 +642,13 @@ AT_CHECK([tail -1 stdout], [0], [Datapath actions: tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1 ]) +dnl Check VXLAN tunnel push with checksum. +AT_CHECK([ovs-vsctl set Interface t2 options:csum=true]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1 +]) + AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl 1.1.2.92 f8:bc:12:44:34:b6 br0 1.1.2.93 f8:bc:12:44:34:b7 br0 diff --git a/tests/tunnel.at b/tests/tunnel.at index 9d539ee6f..31e935901 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -1038,7 +1038,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0,src=1.1.1.1,dst=1.1.1.2,ttl=64),in_port(4789)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(ipv6_dst=2001:cafe::1,ttl=64,tp_dst=4789,flags(df))),4789 + [Datapath actions: set(tunnel(ipv6_dst=2001:cafe::1,ttl=64,tp_dst=4789,flags(df|csum))),4789 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,ipv6_src=2001:cafe::1,ipv6_dst=2001:cafe::2,ttl=64),in_port(4789)'], [0], [stdout]) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index e3afb78a4..7f2d188be 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3207,9 +3207,15 @@

Optional. Compute encapsulation header (either GRE or UDP) - checksums on outgoing packets. Default is disabled, set to - true to enable. Checksums present on incoming - packets will be validated regardless of this setting. + checksums on outgoing packets. When unset (the default value), + checksum computing for outgoing packets is enabled for UDP IPv6 + tunnels, and disabled for GRE and IPv4 UDP tunnels. When set to + false, no checksums will be computed for outgoing + tunnel encapsulation headers. When true, checksums + will be computed for all outgoing tunnel encapsulation headers. + Checksums present on incoming packets will be validated + regardless of this setting. Incoming packets without a checksum + will also be accepted regardless of this setting.