mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 18:07:40 +00:00
conntrack: Document all-zero IP SNAT behavior and add a test case.
Currently, conntrack in the kernel has an undocumented feature referred to as all-zero IP address SNAT. Basically, when a source port collision is detected during the commit, the source port will be translated to an ephemeral port. If there is no collision, no SNAT is performed. This patchset documents this behavior and adds a self-test to verify it's not changing. In addition, a datapath feature flag is added for the all-zero IP SNAT case. This will help applications on top of OVS, like OVN, to determine this feature can be used. Signed-off-by: Eelco Chaudron <echaudro@redhat.com> Acked-by: Aaron Conole <aconole@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Acked-by: Alin-Gabriel Serdean <aserdean@ovn.org> Acked-by: Paolo Valerio <pvalerio@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
parent
355fef6f2c
commit
e6ad4d8d9c
3
NEWS
3
NEWS
@ -32,6 +32,9 @@ Post-v2.15.0
|
|||||||
- ovsdb-tool:
|
- ovsdb-tool:
|
||||||
* New option '--election-timer' to the 'create-cluster' command to set the
|
* New option '--election-timer' to the 'create-cluster' command to set the
|
||||||
leader election timer during cluster creation.
|
leader election timer during cluster creation.
|
||||||
|
- OVS now reports the datapath capability 'ct_zero_snat', which reflects
|
||||||
|
whether the SNAT with all-zero IP address is supported.
|
||||||
|
See ovs-vswitchd.conf.db(5) for details.
|
||||||
|
|
||||||
|
|
||||||
v2.15.0 - 15 Feb 2021
|
v2.15.0 - 15 Feb 2021
|
||||||
|
@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
|
|||||||
dpif, tp_id, dl_type, nw_proto, tp_name, is_generic)
|
dpif, tp_id, dl_type, nw_proto, tp_name, is_generic)
|
||||||
: EOPNOTSUPP);
|
: EOPNOTSUPP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ct_dpif_get_features(struct dpif *dpif, enum ct_features *features)
|
||||||
|
{
|
||||||
|
return (dpif->dpif_class->ct_get_features
|
||||||
|
? dpif->dpif_class->ct_get_features(dpif, features)
|
||||||
|
: EOPNOTSUPP);
|
||||||
|
}
|
||||||
|
@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy {
|
|||||||
* timeout attribute values */
|
* timeout attribute values */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Conntrack Features. */
|
||||||
|
enum ct_features {
|
||||||
|
CONNTRACK_F_ZERO_SNAT = 1 << 0, /* All-zero SNAT support. */
|
||||||
|
};
|
||||||
|
|
||||||
int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
|
int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
|
||||||
const uint16_t *zone, int *);
|
const uint16_t *zone, int *);
|
||||||
int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
|
int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
|
||||||
@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state);
|
|||||||
int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
|
int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
|
||||||
uint16_t dl_type, uint8_t nw_proto,
|
uint16_t dl_type, uint8_t nw_proto,
|
||||||
char **tp_name, bool *is_generic);
|
char **tp_name, bool *is_generic);
|
||||||
|
int ct_dpif_get_features(struct dpif *dpif, enum ct_features *features);
|
||||||
|
|
||||||
#endif /* CT_DPIF_H */
|
#endif /* CT_DPIF_H */
|
||||||
|
@ -8577,6 +8577,7 @@ const struct dpif_class dpif_netdev_class = {
|
|||||||
NULL, /* ct_timeout_policy_dump_next */
|
NULL, /* ct_timeout_policy_dump_next */
|
||||||
NULL, /* ct_timeout_policy_dump_done */
|
NULL, /* ct_timeout_policy_dump_done */
|
||||||
dpif_netdev_ct_get_timeout_policy_name,
|
dpif_netdev_ct_get_timeout_policy_name,
|
||||||
|
NULL, /* ct_get_features */
|
||||||
dpif_netdev_ipf_set_enabled,
|
dpif_netdev_ipf_set_enabled,
|
||||||
dpif_netdev_ipf_set_min_frag,
|
dpif_netdev_ipf_set_min_frag,
|
||||||
dpif_netdev_ipf_set_max_nfrags,
|
dpif_netdev_ipf_set_max_nfrags,
|
||||||
|
@ -3165,6 +3165,20 @@ dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED,
|
||||||
|
enum ct_features *features)
|
||||||
|
{
|
||||||
|
if (features != NULL) {
|
||||||
|
#ifndef _WIN32
|
||||||
|
*features = CONNTRACK_F_ZERO_SNAT;
|
||||||
|
#else
|
||||||
|
*features = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#define CT_DPIF_NL_TP_TCP_MAPPINGS \
|
#define CT_DPIF_NL_TP_TCP_MAPPINGS \
|
||||||
CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \
|
CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \
|
||||||
CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \
|
CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \
|
||||||
@ -4007,6 +4021,7 @@ const struct dpif_class dpif_netlink_class = {
|
|||||||
dpif_netlink_ct_timeout_policy_dump_next,
|
dpif_netlink_ct_timeout_policy_dump_next,
|
||||||
dpif_netlink_ct_timeout_policy_dump_done,
|
dpif_netlink_ct_timeout_policy_dump_done,
|
||||||
dpif_netlink_ct_get_timeout_policy_name,
|
dpif_netlink_ct_get_timeout_policy_name,
|
||||||
|
dpif_netlink_ct_get_features,
|
||||||
NULL, /* ipf_set_enabled */
|
NULL, /* ipf_set_enabled */
|
||||||
NULL, /* ipf_set_min_frag */
|
NULL, /* ipf_set_min_frag */
|
||||||
NULL, /* ipf_set_max_nfrags */
|
NULL, /* ipf_set_max_nfrags */
|
||||||
|
@ -81,6 +81,7 @@ struct ct_dpif_dump_state;
|
|||||||
struct ct_dpif_entry;
|
struct ct_dpif_entry;
|
||||||
struct ct_dpif_tuple;
|
struct ct_dpif_tuple;
|
||||||
struct ct_dpif_timeout_policy;
|
struct ct_dpif_timeout_policy;
|
||||||
|
enum ct_features;
|
||||||
|
|
||||||
/* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in
|
/* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in
|
||||||
* sync with 'ipf_proto_status' and 'ipf_status', but more
|
* sync with 'ipf_proto_status' and 'ipf_status', but more
|
||||||
@ -562,6 +563,10 @@ struct dpif_class {
|
|||||||
uint16_t dl_type, uint8_t nw_proto,
|
uint16_t dl_type, uint8_t nw_proto,
|
||||||
char **tp_name, bool *is_generic);
|
char **tp_name, bool *is_generic);
|
||||||
|
|
||||||
|
/* Stores the conntrack features supported by 'dpif' into features.
|
||||||
|
* The value is a bitmap of CONNTRACK_F_* bits. */
|
||||||
|
int (*ct_get_features)(struct dpif *, enum ct_features *features);
|
||||||
|
|
||||||
/* IP Fragmentation. */
|
/* IP Fragmentation. */
|
||||||
|
|
||||||
/* Disables or enables conntrack fragment reassembly. The default
|
/* Disables or enables conntrack fragment reassembly. The default
|
||||||
|
@ -2132,6 +2132,16 @@ for <var>i</var> in [1,<var>n_members</var>]:
|
|||||||
connection, will behave the same as a bare <code>nat</code>.
|
connection, will behave the same as a bare <code>nat</code>.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
For SNAT, there is a special case when the <code>src</code> IP
|
||||||
|
address is configured as all 0's, i.e.,
|
||||||
|
<code>nat(src=0.0.0.0)</code>. In this case, when a source port
|
||||||
|
collision is detected during the commit, the source port will be
|
||||||
|
translated to an ephemeral port. If there is no collision, no SNAT
|
||||||
|
is performed. Note that this is currently only implemented in the
|
||||||
|
Linux kernel datapath.
|
||||||
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
Open vSwitch 2.6 introduced <code>nat</code>. Linux 4.6 was the
|
Open vSwitch 2.6 introduced <code>nat</code>. Linux 4.6 was the
|
||||||
earliest upstream kernel that implemented <code>ct</code> support for
|
earliest upstream kernel that implemented <code>ct</code> support for
|
||||||
|
@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer *backer)
|
|||||||
return !error;
|
return !error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Tests whether 'backer''s datapath supports the all-zero SNAT case. */
|
||||||
|
static bool
|
||||||
|
dpif_supports_ct_zero_snat(struct dpif_backer *backer)
|
||||||
|
{
|
||||||
|
enum ct_features features;
|
||||||
|
bool supported = false;
|
||||||
|
|
||||||
|
if (!ct_dpif_get_features(backer->dpif, &features)) {
|
||||||
|
if (features & CONNTRACK_F_ZERO_SNAT) {
|
||||||
|
supported = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VLOG_INFO("%s: Datapath %s ct_zero_snat",
|
||||||
|
dpif_name(backer->dpif), (supported) ? "supports"
|
||||||
|
: "does not support");
|
||||||
|
return supported;
|
||||||
|
}
|
||||||
|
|
||||||
/* Tests whether 'backer''s datapath supports the
|
/* Tests whether 'backer''s datapath supports the
|
||||||
* OVS_ACTION_ATTR_CHECK_PKT_LEN action. */
|
* OVS_ACTION_ATTR_CHECK_PKT_LEN action. */
|
||||||
static bool
|
static bool
|
||||||
@ -1590,6 +1608,7 @@ check_support(struct dpif_backer *backer)
|
|||||||
dpif_supports_explicit_drop_action(backer->dpif);
|
dpif_supports_explicit_drop_action(backer->dpif);
|
||||||
backer->rt_support.lb_output_action =
|
backer->rt_support.lb_output_action =
|
||||||
dpif_supports_lb_output_action(backer->dpif);
|
dpif_supports_lb_output_action(backer->dpif);
|
||||||
|
backer->rt_support.ct_zero_snat = dpif_supports_ct_zero_snat(backer);
|
||||||
|
|
||||||
/* Flow fields. */
|
/* Flow fields. */
|
||||||
backer->rt_support.odp.ct_state = check_ct_state(backer);
|
backer->rt_support.odp.ct_state = check_ct_state(backer);
|
||||||
@ -5605,6 +5624,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap)
|
|||||||
smap_add(cap, "explicit_drop_action",
|
smap_add(cap, "explicit_drop_action",
|
||||||
s.explicit_drop_action ? "true" :"false");
|
s.explicit_drop_action ? "true" :"false");
|
||||||
smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false");
|
smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false");
|
||||||
|
smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : "false");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
|
/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
|
||||||
|
@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
|
|||||||
DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \
|
DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \
|
||||||
\
|
\
|
||||||
/* True if the datapath supports balance_tcp optimization */ \
|
/* True if the datapath supports balance_tcp optimization */ \
|
||||||
DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")
|
DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")\
|
||||||
|
\
|
||||||
|
/* True if the datapath supports all-zero IP SNAT. */ \
|
||||||
|
DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP SNAT")
|
||||||
|
|
||||||
|
|
||||||
/* Stores the various features which the corresponding backer supports. */
|
/* Stores the various features which the corresponding backer supports. */
|
||||||
|
@ -99,6 +99,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
|
|||||||
#
|
#
|
||||||
m4_define([CHECK_CONNTRACK_NAT])
|
m4_define([CHECK_CONNTRACK_NAT])
|
||||||
|
|
||||||
|
# CHECK_CONNTRACK_ZEROIP_SNAT()
|
||||||
|
#
|
||||||
|
# Perform requirements checks for running conntrack all-zero IP SNAT tests.
|
||||||
|
# The kernel always supports all-zero IP SNAT, so no check is needed.
|
||||||
|
# However, the Windows datapath using the same netlink interface does not.
|
||||||
|
#
|
||||||
|
m4_define([CHECK_CONNTRACK_ZEROIP_SNAT],
|
||||||
|
[
|
||||||
|
AT_SKIP_IF([test "$IS_WIN32" = "yes"])
|
||||||
|
])
|
||||||
|
|
||||||
# CHECK_CONNTRACK_TIMEOUT()
|
# CHECK_CONNTRACK_TIMEOUT()
|
||||||
#
|
#
|
||||||
# Perform requirements checks for running conntrack customized timeout tests.
|
# Perform requirements checks for running conntrack customized timeout tests.
|
||||||
|
@ -4487,6 +4487,52 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
|
|||||||
OVS_TRAFFIC_VSWITCHD_STOP
|
OVS_TRAFFIC_VSWITCHD_STOP
|
||||||
AT_CLEANUP
|
AT_CLEANUP
|
||||||
|
|
||||||
|
|
||||||
|
AT_SETUP([conntrack - all-zero IP SNAT])
|
||||||
|
AT_SKIP_IF([test $HAVE_NC = no])
|
||||||
|
CHECK_CONNTRACK()
|
||||||
|
CHECK_CONNTRACK_ZEROIP_SNAT()
|
||||||
|
OVS_TRAFFIC_VSWITCHD_START()
|
||||||
|
|
||||||
|
ADD_NAMESPACES(at_ns0, at_ns1)
|
||||||
|
ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
|
||||||
|
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
|
||||||
|
NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
|
||||||
|
|
||||||
|
OVS_START_L7([at_ns1], [http])
|
||||||
|
|
||||||
|
AT_DATA([flows.txt], [dnl
|
||||||
|
table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
|
||||||
|
table=0,priority=20,ct_state=-rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
|
||||||
|
table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resubmit(,10)
|
||||||
|
table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
|
||||||
|
table=0,priority=10,arp,action=normal
|
||||||
|
table=0,priority=1,action=drop
|
||||||
|
table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat)
|
||||||
|
table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
|
||||||
|
table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
|
||||||
|
table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
|
||||||
|
])
|
||||||
|
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
|
||||||
|
|
||||||
|
dnl - Test to make sure src nat is NOT done when not needed
|
||||||
|
NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log])
|
||||||
|
AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl
|
||||||
|
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
|
||||||
|
])
|
||||||
|
|
||||||
|
dnl - Test to make sure src nat is done when needed
|
||||||
|
NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log])
|
||||||
|
NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log])
|
||||||
|
AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], [0], [dnl
|
||||||
|
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
|
||||||
|
tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
|
||||||
|
])
|
||||||
|
|
||||||
|
OVS_TRAFFIC_VSWITCHD_STOP
|
||||||
|
AT_CLEANUP
|
||||||
|
|
||||||
|
|
||||||
AT_SETUP([conntrack - simple DNAT])
|
AT_SETUP([conntrack - simple DNAT])
|
||||||
CHECK_CONNTRACK()
|
CHECK_CONNTRACK()
|
||||||
CHECK_CONNTRACK_NAT()
|
CHECK_CONNTRACK_NAT()
|
||||||
|
@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
|
|||||||
#
|
#
|
||||||
m4_define([CHECK_CONNTRACK_NAT])
|
m4_define([CHECK_CONNTRACK_NAT])
|
||||||
|
|
||||||
|
# CHECK_CONNTRACK_ZEROIP_SNAT()
|
||||||
|
#
|
||||||
|
# Perform requirements checks for running conntrack all-zero IP SNAT tests.
|
||||||
|
# The userspace datapath does not support all-zero IP SNAT.
|
||||||
|
#
|
||||||
|
m4_define([CHECK_CONNTRACK_ZEROIP_SNAT],
|
||||||
|
[
|
||||||
|
AT_SKIP_IF([:])
|
||||||
|
])
|
||||||
|
|
||||||
# CHECK_CONNTRACK_TIMEOUT()
|
# CHECK_CONNTRACK_TIMEOUT()
|
||||||
#
|
#
|
||||||
# Perform requirements checks for running conntrack customized timeout tests.
|
# Perform requirements checks for running conntrack customized timeout tests.
|
||||||
|
@ -6181,6 +6181,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
|
|||||||
True if the datapath supports OVS_ACTION_ATTR_DROP. If false,
|
True if the datapath supports OVS_ACTION_ATTR_DROP. If false,
|
||||||
explicit drop action will not be sent to the datapath.
|
explicit drop action will not be sent to the datapath.
|
||||||
</column>
|
</column>
|
||||||
|
<column name="capabilities" key="ct_zero_snat"
|
||||||
|
type='{"type": "boolean"}'>
|
||||||
|
True if the datapath supports all-zero SNAT. This is a special case
|
||||||
|
if the <code>src</code> IP address is configured as all 0's, i.e.,
|
||||||
|
<code>nat(src=0.0.0.0)</code>. In this case, when a source port
|
||||||
|
collision is detected during the commit, the source port will be
|
||||||
|
translated to an ephemeral port. If there is no collision, no SNAT
|
||||||
|
is performed.
|
||||||
|
</column>
|
||||||
</group>
|
</group>
|
||||||
|
|
||||||
<group title="Common Columns">
|
<group title="Common Columns">
|
||||||
|
Loading…
x
Reference in New Issue
Block a user