diff --git a/NEWS b/NEWS index dddd57fc2..33adec490 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,9 @@ Post-v2.15.0 - ovsdb-tool: * New option '--election-timer' to the 'create-cluster' command to set the leader election timer during cluster creation. + - OVS now reports the datapath capability 'ct_zero_snat', which reflects + whether the SNAT with all-zero IP address is supported. + See ovs-vswitchd.conf.db(5) for details. v2.15.0 - 15 Feb 2021 diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c index 6a5ba052d..cfc2315e3 100644 --- a/lib/ct-dpif.c +++ b/lib/ct-dpif.c @@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, dpif, tp_id, dl_type, nw_proto, tp_name, is_generic) : EOPNOTSUPP); } + +int +ct_dpif_get_features(struct dpif *dpif, enum ct_features *features) +{ + return (dpif->dpif_class->ct_get_features + ? dpif->dpif_class->ct_get_features(dpif, features) + : EOPNOTSUPP); +} diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index 88f4c7e28..b59cba962 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy { * timeout attribute values */ }; +/* Conntrack Features. */ +enum ct_features { + CONNTRACK_F_ZERO_SNAT = 1 << 0, /* All-zero SNAT support. */ +}; + int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **, const uint16_t *zone, int *); int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *); @@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state); int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, char **tp_name, bool *is_generic); +int ct_dpif_get_features(struct dpif *dpif, enum ct_features *features); #endif /* CT_DPIF_H */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 26218ad72..ce51e97b8 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -8577,6 +8577,7 @@ const struct dpif_class dpif_netdev_class = { NULL, /* ct_timeout_policy_dump_next */ NULL, /* ct_timeout_policy_dump_done */ dpif_netdev_ct_get_timeout_policy_name, + NULL, /* ct_get_features */ dpif_netdev_ipf_set_enabled, dpif_netdev_ipf_set_min_frag, dpif_netdev_ipf_set_max_nfrags, diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 73d5608a8..39dc8300e 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -3165,6 +3165,20 @@ dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, return 0; } +static int +dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED, + enum ct_features *features) +{ + if (features != NULL) { +#ifndef _WIN32 + *features = CONNTRACK_F_ZERO_SNAT; +#else + *features = 0; +#endif + } + return 0; +} + #define CT_DPIF_NL_TP_TCP_MAPPINGS \ CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ @@ -4007,6 +4021,7 @@ const struct dpif_class dpif_netlink_class = { dpif_netlink_ct_timeout_policy_dump_next, dpif_netlink_ct_timeout_policy_dump_done, dpif_netlink_ct_get_timeout_policy_name, + dpif_netlink_ct_get_features, NULL, /* ipf_set_enabled */ NULL, /* ipf_set_min_frag */ NULL, /* ipf_set_max_nfrags */ diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index b817fceac..59e0a3a9d 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -81,6 +81,7 @@ struct ct_dpif_dump_state; struct ct_dpif_entry; struct ct_dpif_tuple; struct ct_dpif_timeout_policy; +enum ct_features; /* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in * sync with 'ipf_proto_status' and 'ipf_status', but more @@ -562,6 +563,10 @@ struct dpif_class { uint16_t dl_type, uint8_t nw_proto, char **tp_name, bool *is_generic); + /* Stores the conntrack features supported by 'dpif' into features. + * The value is a bitmap of CONNTRACK_F_* bits. */ + int (*ct_get_features)(struct dpif *, enum ct_features *features); + /* IP Fragmentation. */ /* Disables or enables conntrack fragment reassembly. The default diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml index 1f6f30685..f285350eb 100644 --- a/lib/ovs-actions.xml +++ b/lib/ovs-actions.xml @@ -2132,6 +2132,16 @@ for i in [1,n_members]: connection, will behave the same as a bare nat.

+

+ For SNAT, there is a special case when the src IP + address is configured as all 0's, i.e., + nat(src=0.0.0.0). In this case, when a source port + collision is detected during the commit, the source port will be + translated to an ephemeral port. If there is no collision, no SNAT + is performed. Note that this is currently only implemented in the + Linux kernel datapath. +

+

Open vSwitch 2.6 introduced nat. Linux 4.6 was the earliest upstream kernel that implemented ct support for diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 47db9bb57..5ce56adfa 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer *backer) return !error; } +/* Tests whether 'backer''s datapath supports the all-zero SNAT case. */ +static bool +dpif_supports_ct_zero_snat(struct dpif_backer *backer) +{ + enum ct_features features; + bool supported = false; + + if (!ct_dpif_get_features(backer->dpif, &features)) { + if (features & CONNTRACK_F_ZERO_SNAT) { + supported = true; + } + } + VLOG_INFO("%s: Datapath %s ct_zero_snat", + dpif_name(backer->dpif), (supported) ? "supports" + : "does not support"); + return supported; +} + /* Tests whether 'backer''s datapath supports the * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */ static bool @@ -1588,8 +1606,9 @@ check_support(struct dpif_backer *backer) backer->rt_support.ct_timeout = check_ct_timeout_policy(backer); backer->rt_support.explicit_drop_action = dpif_supports_explicit_drop_action(backer->dpif); - backer->rt_support.lb_output_action= + backer->rt_support.lb_output_action = dpif_supports_lb_output_action(backer->dpif); + backer->rt_support.ct_zero_snat = dpif_supports_ct_zero_snat(backer); /* Flow fields. */ backer->rt_support.odp.ct_state = check_ct_state(backer); @@ -5605,6 +5624,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap) smap_add(cap, "explicit_drop_action", s.explicit_drop_action ? "true" :"false"); smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false"); + smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : "false"); } /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index b41c3d82a..191cfcb0d 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \ \ /* True if the datapath supports balance_tcp optimization */ \ - DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode") + DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")\ + \ + /* True if the datapath supports all-zero IP SNAT. */ \ + DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP SNAT") /* Stores the various features which the corresponding backer supports. */ diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 15628a7c6..86d633ac4 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -99,6 +99,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], # m4_define([CHECK_CONNTRACK_NAT]) +# CHECK_CONNTRACK_ZEROIP_SNAT() +# +# Perform requirements checks for running conntrack all-zero IP SNAT tests. +# The kernel always supports all-zero IP SNAT, so no check is needed. +# However, the Windows datapath using the same netlink interface does not. +# +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], +[ + AT_SKIP_IF([test "$IS_WIN32" = "yes"]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. diff --git a/tests/system-traffic.at b/tests/system-traffic.at index c73bbc420..6a15d08c2 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -4487,6 +4487,52 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([conntrack - all-zero IP SNAT]) +AT_SKIP_IF([test $HAVE_NC = no]) +CHECK_CONNTRACK() +CHECK_CONNTRACK_ZEROIP_SNAT() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2]) + +OVS_START_L7([at_ns1], [http]) + +AT_DATA([flows.txt], [dnl +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0) +table=0,priority=20,ct_state=-rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10) +table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resubmit(,10) +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10) +table=0,priority=10,arp,action=normal +table=0,priority=1,action=drop +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat) +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20) +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1 +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2 +]) +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +dnl - Test to make sure src nat is NOT done when not needed +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log]) +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT) +]) + +dnl - Test to make sure src nat is done when needed +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log]) +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log]) +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=/g' | sort], [0], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=),protoinfo=(state=TIME_WAIT) +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=),protoinfo=(state=TIME_WAIT) +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + + AT_SETUP([conntrack - simple DNAT]) CHECK_CONNTRACK() CHECK_CONNTRACK_NAT() diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index 34f82cee3..9f0d38dfb 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) # m4_define([CHECK_CONNTRACK_NAT]) +# CHECK_CONNTRACK_ZEROIP_SNAT() +# +# Perform requirements checks for running conntrack all-zero IP SNAT tests. +# The userspace datapath does not support all-zero IP SNAT. +# +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], +[ + AT_SKIP_IF([:]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 3522b2497..e8c0b02ef 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -6181,6 +6181,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ True if the datapath supports OVS_ACTION_ATTR_DROP. If false, explicit drop action will not be sent to the datapath. + + True if the datapath supports all-zero SNAT. This is a special case + if the src IP address is configured as all 0's, i.e., + nat(src=0.0.0.0). In this case, when a source port + collision is detected during the commit, the source port will be + translated to an ephemeral port. If there is no collision, no SNAT + is performed. +