2015-11-15 22:07:25 -08:00
|
|
|
/*
|
2019-05-09 08:15:07 -07:00
|
|
|
* Copyright (c) 2015-2019 Nicira, Inc.
|
2015-11-15 22:07:25 -08:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef CONNTRACK_PRIVATE_H
|
|
|
|
#define CONNTRACK_PRIVATE_H 1
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/ip6.h>
|
|
|
|
|
2019-05-09 08:15:07 -07:00
|
|
|
#include "cmap.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
#include "conntrack.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
#include "ct-dpif.h"
|
2019-05-09 08:15:07 -07:00
|
|
|
#include "ipf.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
#include "openvswitch/hmap.h"
|
|
|
|
#include "openvswitch/list.h"
|
|
|
|
#include "openvswitch/types.h"
|
|
|
|
#include "packets.h"
|
2022-07-11 18:55:08 +02:00
|
|
|
#include "rculist.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
#include "unaligned.h"
|
2017-05-30 14:21:33 -07:00
|
|
|
#include "dp-packet.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
struct ct_endpoint {
|
2019-02-14 13:15:08 -08:00
|
|
|
union ct_addr addr;
|
2016-05-13 15:04:17 -07:00
|
|
|
union {
|
|
|
|
ovs_be16 port;
|
|
|
|
struct {
|
|
|
|
ovs_be16 icmp_id;
|
|
|
|
uint8_t icmp_type;
|
|
|
|
uint8_t icmp_code;
|
|
|
|
};
|
|
|
|
};
|
2015-11-15 22:07:25 -08:00
|
|
|
};
|
|
|
|
|
2017-06-09 15:30:43 -07:00
|
|
|
/* Verify that there is no padding in struct ct_endpoint, to facilitate
|
|
|
|
* hashing in ct_endpoint_hash_add(). */
|
2019-02-14 13:15:08 -08:00
|
|
|
BUILD_ASSERT_DECL(sizeof(struct ct_endpoint) == sizeof(union ct_addr) + 4);
|
2017-06-09 15:30:43 -07:00
|
|
|
|
2023-08-30 21:29:51 +02:00
|
|
|
enum key_dir {
|
|
|
|
CT_DIR_FWD = 0,
|
|
|
|
CT_DIR_REV,
|
|
|
|
CT_DIRS,
|
|
|
|
};
|
|
|
|
|
2017-08-06 10:51:14 -07:00
|
|
|
/* Changes to this structure need to be reflected in conn_key_hash()
|
|
|
|
* and conn_key_cmp(). */
|
2015-11-15 22:07:25 -08:00
|
|
|
struct conn_key {
|
|
|
|
struct ct_endpoint src;
|
|
|
|
struct ct_endpoint dst;
|
|
|
|
|
|
|
|
ovs_be16 dl_type;
|
|
|
|
uint16_t zone;
|
2017-08-06 10:51:14 -07:00
|
|
|
uint8_t nw_proto;
|
2015-11-15 22:07:25 -08:00
|
|
|
};
|
|
|
|
|
2020-10-12 07:38:38 -04:00
|
|
|
/* Verify that nw_proto stays uint8_t as it's used to index into l4_protos[] */
|
|
|
|
BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct conn_key, nw_proto) == sizeof(uint8_t));
|
|
|
|
|
2017-08-06 10:51:14 -07:00
|
|
|
/* This is used for alg expectations; an expectation is a
|
|
|
|
* context created in preparation for establishing a data
|
|
|
|
* connection. The expectation is created by the control
|
|
|
|
* connection. */
|
|
|
|
struct alg_exp_node {
|
2018-01-09 15:44:54 -08:00
|
|
|
/* Node in alg_expectations. */
|
2017-08-06 10:51:14 -07:00
|
|
|
struct hmap_node node;
|
2018-01-09 15:44:54 -08:00
|
|
|
/* Node in alg_expectation_refs. */
|
|
|
|
struct hindex_node node_ref;
|
2017-08-06 10:51:14 -07:00
|
|
|
/* Key of data connection to be created. */
|
|
|
|
struct conn_key key;
|
|
|
|
/* Corresponding key of the control connection. */
|
2020-06-17 15:31:09 -07:00
|
|
|
struct conn_key parent_key;
|
2017-08-06 10:51:14 -07:00
|
|
|
/* The NAT replacement address to be used by the data connection. */
|
2019-02-14 13:15:08 -08:00
|
|
|
union ct_addr alg_nat_repl_addr;
|
2020-06-17 15:31:09 -07:00
|
|
|
/* The data connection inherits the parent control
|
2017-08-06 10:51:14 -07:00
|
|
|
* connection label and mark. */
|
2020-06-17 15:31:09 -07:00
|
|
|
ovs_u128 parent_label;
|
|
|
|
uint32_t parent_mark;
|
2018-01-09 15:44:55 -08:00
|
|
|
/* True if for NAT application, the alg replaces the dest address;
|
|
|
|
* otherwise, the source address is replaced. */
|
|
|
|
bool nat_rpl_dst;
|
2017-08-06 10:51:14 -07:00
|
|
|
};
|
|
|
|
|
2022-07-11 18:55:08 +02:00
|
|
|
/* Timeouts: all the possible timeout states passed to update_expiration()
|
|
|
|
* are listed here. The name will be prefix by CT_TM_ and the value is in
|
|
|
|
* milliseconds */
|
|
|
|
#define CT_TIMEOUTS \
|
|
|
|
CT_TIMEOUT(TCP_FIRST_PACKET) \
|
|
|
|
CT_TIMEOUT(TCP_OPENING) \
|
|
|
|
CT_TIMEOUT(TCP_ESTABLISHED) \
|
|
|
|
CT_TIMEOUT(TCP_CLOSING) \
|
|
|
|
CT_TIMEOUT(TCP_FIN_WAIT) \
|
|
|
|
CT_TIMEOUT(TCP_CLOSED) \
|
|
|
|
CT_TIMEOUT(OTHER_FIRST) \
|
|
|
|
CT_TIMEOUT(OTHER_MULTIPLE) \
|
|
|
|
CT_TIMEOUT(OTHER_BIDIR) \
|
|
|
|
CT_TIMEOUT(ICMP_FIRST) \
|
|
|
|
CT_TIMEOUT(ICMP_REPLY)
|
|
|
|
|
|
|
|
enum ct_timeout {
|
|
|
|
#define CT_TIMEOUT(NAME) CT_TM_##NAME,
|
|
|
|
CT_TIMEOUTS
|
|
|
|
#undef CT_TIMEOUT
|
|
|
|
N_CT_TM
|
|
|
|
};
|
|
|
|
|
|
|
|
#define N_EXP_LISTS 100
|
|
|
|
|
2023-08-30 21:29:51 +02:00
|
|
|
struct conn_key_node {
|
|
|
|
enum key_dir dir;
|
|
|
|
struct conn_key key;
|
|
|
|
struct cmap_node cm_node;
|
2019-05-09 08:15:07 -07:00
|
|
|
};
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
struct conn {
|
2019-05-09 08:15:07 -07:00
|
|
|
/* Immutable data. */
|
2023-08-30 21:29:51 +02:00
|
|
|
struct conn_key_node key_node[CT_DIRS];
|
2020-06-17 15:31:09 -07:00
|
|
|
struct conn_key parent_key; /* Only used for orig_tuple support. */
|
2021-08-11 11:45:12 +08:00
|
|
|
uint16_t nat_action;
|
2017-08-06 10:51:14 -07:00
|
|
|
char *alg;
|
2022-07-11 18:55:08 +02:00
|
|
|
atomic_flag reclaimed; /* False during the lifetime of the connection,
|
|
|
|
* True as soon as a thread has started freeing
|
|
|
|
* its memory. */
|
|
|
|
|
|
|
|
/* Inserted once by a PMD, then managed by the 'ct_clean' thread. */
|
|
|
|
struct rculist node;
|
2019-05-09 08:15:07 -07:00
|
|
|
|
|
|
|
/* Mutable data. */
|
|
|
|
struct ovs_mutex lock; /* Guards all mutable fields. */
|
|
|
|
ovs_u128 label;
|
2022-07-11 18:55:15 +02:00
|
|
|
atomic_llong expiration;
|
2019-05-28 11:14:42 -07:00
|
|
|
uint32_t mark;
|
2019-05-09 08:15:07 -07:00
|
|
|
int seq_skew;
|
2019-12-03 09:14:17 -08:00
|
|
|
|
|
|
|
/* Immutable data. */
|
|
|
|
int32_t admit_zone; /* The zone for managing zone limit counts. */
|
|
|
|
uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
|
|
|
|
|
|
|
|
/* Mutable data. */
|
2019-05-09 08:15:07 -07:00
|
|
|
bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP
|
|
|
|
* control messages; true if reply direction. */
|
|
|
|
|
|
|
|
/* Immutable data. */
|
|
|
|
bool alg_related; /* True if alg data connection. */
|
2020-04-29 12:25:11 -07:00
|
|
|
|
|
|
|
uint32_t tp_id; /* Timeout policy ID. */
|
2015-11-15 22:07:25 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
enum ct_update_res {
|
|
|
|
CT_UPDATE_INVALID,
|
|
|
|
CT_UPDATE_VALID,
|
|
|
|
CT_UPDATE_NEW,
|
2019-12-20 09:51:08 -08:00
|
|
|
CT_UPDATE_VALID_NEW,
|
2015-11-15 22:07:25 -08:00
|
|
|
};
|
|
|
|
|
conntrack: Handle SNAT with all-zero IP address.
This patch introduces for the userspace datapath the handling
of rules like the following:
ct(commit,nat(src=0.0.0.0),...)
Kernel datapath already handle this case that is particularly
handy in scenarios like the following:
Given A: 10.1.1.1, B: 192.168.2.100, C: 10.1.1.2
A opens a connection toward B on port 80 selecting as source port 10000.
B's IP gets dnat'ed to C's IP (10.1.1.1:10000 -> 192.168.2.100:80).
This will result in:
tcp,orig=(src=10.1.1.1,dst=192.168.2.100,sport=10000,dport=80),
reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=10000),
protoinfo=(state=ESTABLISHED)
A now tries to establish another connection with C using source port
10000, this time using C's IP address (10.1.1.1:10000 -> 10.1.1.2:80).
This second connection, if processed by conntrack with no SNAT/DNAT
involved, collides with the reverse tuple of the first connection,
so the entry for this valid connection doesn't get created.
With this commit, and adding a SNAT rule with 0.0.0.0 for
10.1.1.1:10000 -> 10.1.1.2:80 will allow to create the conn entry:
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=10000,dport=80),
reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=10001),
protoinfo=(state=ESTABLISHED)
tcp,orig=(src=10.1.1.1,dst=192.168.2.100,sport=10000,dport=80),
reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=10000),
protoinfo=(state=ESTABLISHED)
The issue exists even in the opposite case (with A trying to connect
to C using B's IP after establishing a direct connection from A to C).
This commit refactors the relevant function in a way that both of the
previously mentioned cases are handled as well.
Suggested-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
Acked-by: Gaetan Rivet <grive@u256.net>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-06 15:03:18 +02:00
|
|
|
#define NAT_ACTION_SNAT_ALL (NAT_ACTION_SRC | NAT_ACTION_SRC_PORT)
|
|
|
|
#define NAT_ACTION_DNAT_ALL (NAT_ACTION_DST | NAT_ACTION_DST_PORT)
|
|
|
|
|
|
|
|
enum ct_ephemeral_range {
|
|
|
|
MIN_NAT_EPHEMERAL_PORT = 1024,
|
|
|
|
MAX_NAT_EPHEMERAL_PORT = 65535
|
|
|
|
};
|
|
|
|
|
|
|
|
#define IN_RANGE(curr, min, max) \
|
|
|
|
(curr >= min && curr <= max)
|
|
|
|
|
|
|
|
#define NEXT_PORT_IN_RANGE(curr, min, max) \
|
|
|
|
(curr = (!IN_RANGE(curr, min, max) || curr == max) ? min : curr + 1)
|
|
|
|
|
|
|
|
/* If the current port is out of range increase the attempts by
|
|
|
|
* one so that in the worst case scenario the current out of
|
|
|
|
* range port plus all the in-range ports get tested.
|
|
|
|
* Note that curr can be an out of range port only in case of
|
|
|
|
* source port (SNAT with port range unspecified or DNAT),
|
|
|
|
* furthermore the source port in the packet has to be less than
|
|
|
|
* MIN_NAT_EPHEMERAL_PORT. */
|
|
|
|
#define N_PORT_ATTEMPTS(curr, min, max) \
|
|
|
|
((!IN_RANGE(curr, min, max)) ? (max - min) + 2 : (max - min) + 1)
|
|
|
|
|
|
|
|
/* Loose in-range check, the first curr port can be any port out of
|
|
|
|
* the range. */
|
|
|
|
#define FOR_EACH_PORT_IN_RANGE__(curr, min, max, INAME) \
|
|
|
|
for (uint16_t INAME = N_PORT_ATTEMPTS(curr, min, max); \
|
|
|
|
INAME > 0; INAME--, NEXT_PORT_IN_RANGE(curr, min, max))
|
|
|
|
|
|
|
|
#define FOR_EACH_PORT_IN_RANGE(curr, min, max) \
|
|
|
|
FOR_EACH_PORT_IN_RANGE__(curr, min, max, OVS_JOIN(idx, __COUNTER__))
|
|
|
|
|
2019-05-02 21:34:04 -07:00
|
|
|
struct conntrack {
|
2019-05-09 08:15:07 -07:00
|
|
|
struct ovs_mutex ct_lock; /* Protects 2 following fields. */
|
2024-04-24 14:44:54 +02:00
|
|
|
struct cmap conns[UINT16_MAX + 1] OVS_GUARDED;
|
2022-07-11 18:55:08 +02:00
|
|
|
struct rculist exp_lists[N_EXP_LISTS];
|
2022-07-11 18:54:55 +02:00
|
|
|
struct cmap zone_limits OVS_GUARDED;
|
2022-07-11 18:55:02 +02:00
|
|
|
struct cmap timeout_policies OVS_GUARDED;
|
2019-05-09 08:15:07 -07:00
|
|
|
uint32_t hash_basis; /* Salt for hashing a connection key. */
|
|
|
|
pthread_t clean_thread; /* Periodically cleans up connection tracker. */
|
|
|
|
struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
|
2022-07-11 18:55:08 +02:00
|
|
|
unsigned int next_list; /* Next list where the newly created connection
|
|
|
|
* gets inserted. */
|
|
|
|
unsigned int next_sweep; /* List from which the gc thread will resume
|
|
|
|
* the sweeping. */
|
2019-05-09 08:15:07 -07:00
|
|
|
|
|
|
|
/* Counting connections. */
|
|
|
|
atomic_count n_conn; /* Number of connections currently tracked. */
|
|
|
|
atomic_uint n_conn_limit; /* Max connections tracked. */
|
|
|
|
|
|
|
|
/* Expectations for application level gateways (created by control
|
|
|
|
* connections to help create data connections, e.g. for FTP). */
|
|
|
|
struct ovs_rwlock resources_lock; /* Protects fields below. */
|
|
|
|
struct hmap alg_expectations OVS_GUARDED; /* Holds struct
|
|
|
|
* alg_exp_nodes. */
|
|
|
|
struct hindex alg_expectation_refs OVS_GUARDED; /* For lookup from
|
|
|
|
* control context. */
|
2019-05-02 21:34:04 -07:00
|
|
|
|
2019-09-25 14:09:41 -07:00
|
|
|
struct ipf *ipf; /* Fragmentation handling context. */
|
2019-12-03 09:14:17 -08:00
|
|
|
uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
|
2019-09-25 14:09:41 -07:00
|
|
|
atomic_bool tcp_seq_chk; /* Check TCP sequence numbers. */
|
2023-04-06 12:10:22 +02:00
|
|
|
atomic_uint32_t sweep_ms; /* Next sweep interval. */
|
2019-05-02 21:34:04 -07:00
|
|
|
};
|
|
|
|
|
2019-05-09 08:15:07 -07:00
|
|
|
/* Lock acquisition order:
|
2022-07-11 18:55:08 +02:00
|
|
|
* 1. 'conn->lock'
|
|
|
|
* 2. 'ct_lock'
|
2019-05-09 08:15:07 -07:00
|
|
|
* 3. 'resources_lock'
|
|
|
|
*/
|
|
|
|
|
|
|
|
extern struct ct_l4_proto ct_proto_tcp;
|
|
|
|
extern struct ct_l4_proto ct_proto_other;
|
|
|
|
extern struct ct_l4_proto ct_proto_icmp4;
|
|
|
|
extern struct ct_l4_proto ct_proto_icmp6;
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
struct ct_l4_proto {
|
2019-05-09 08:15:07 -07:00
|
|
|
struct conn *(*new_conn)(struct conntrack *ct, struct dp_packet *pkt,
|
2020-04-29 12:25:11 -07:00
|
|
|
long long now, uint32_t tp_id);
|
2015-11-15 22:07:25 -08:00
|
|
|
bool (*valid_new)(struct dp_packet *pkt);
|
2019-05-09 08:15:07 -07:00
|
|
|
enum ct_update_res (*conn_update)(struct conntrack *ct, struct conn *conn,
|
2016-05-16 12:59:23 -07:00
|
|
|
struct dp_packet *pkt, bool reply,
|
|
|
|
long long now);
|
2015-11-15 22:07:25 -08:00
|
|
|
void (*conn_get_protoinfo)(const struct conn *,
|
|
|
|
struct ct_dpif_protoinfo *);
|
2015-11-15 22:07:25 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* conntrack-private.h */
|