2015-11-15 22:07:25 -08:00
|
|
|
|
/*
|
2019-02-13 15:34:21 -08:00
|
|
|
|
* Copyright (c) 2015, 2016, 2017, 2019 Nicira, Inc.
|
2015-11-15 22:07:25 -08:00
|
|
|
|
*
|
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
|
*
|
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
*
|
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
|
* limitations under the License.
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef CONNTRACK_H
|
|
|
|
|
|
#define CONNTRACK_H 1
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
2016-05-16 12:59:23 -07:00
|
|
|
|
#include "latch.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
|
#include "odp-netlink.h"
|
|
|
|
|
|
#include "openvswitch/hmap.h"
|
2016-05-16 12:59:23 -07:00
|
|
|
|
#include "openvswitch/list.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
|
#include "openvswitch/thread.h"
|
|
|
|
|
|
#include "openvswitch/types.h"
|
|
|
|
|
|
#include "ovs-atomic.h"
|
2017-05-30 10:49:25 -07:00
|
|
|
|
#include "ovs-thread.h"
|
|
|
|
|
|
#include "packets.h"
|
2018-01-09 15:44:54 -08:00
|
|
|
|
#include "hindex.h"
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
|
|
|
/* Userspace connection tracker
|
|
|
|
|
|
* ============================
|
|
|
|
|
|
*
|
|
|
|
|
|
* This is a connection tracking module that keeps all the state in userspace.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Usage
|
|
|
|
|
|
* =====
|
|
|
|
|
|
*
|
|
|
|
|
|
* struct conntrack ct;
|
|
|
|
|
|
*
|
|
|
|
|
|
* Initialization:
|
|
|
|
|
|
*
|
|
|
|
|
|
* conntrack_init(&ct);
|
|
|
|
|
|
*
|
|
|
|
|
|
* It is necessary to periodically issue a call to
|
|
|
|
|
|
*
|
|
|
|
|
|
* conntrack_run(&ct);
|
|
|
|
|
|
*
|
|
|
|
|
|
* to allow the module to clean up expired connections.
|
|
|
|
|
|
*
|
|
|
|
|
|
* To send a group of packets through the connection tracker:
|
|
|
|
|
|
*
|
|
|
|
|
|
* conntrack_execute(&ct, pkts, n_pkts, ...);
|
|
|
|
|
|
*
|
|
|
|
|
|
* Thread-safety
|
|
|
|
|
|
* =============
|
|
|
|
|
|
*
|
|
|
|
|
|
* conntrack_execute() can be called by multiple threads simultaneoulsy.
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
struct dp_packet_batch;
|
|
|
|
|
|
|
|
|
|
|
|
struct conntrack;
|
|
|
|
|
|
|
2019-02-14 13:15:08 -08:00
|
|
|
|
union ct_addr {
|
|
|
|
|
|
ovs_be32 ipv4;
|
|
|
|
|
|
struct in6_addr ipv6;
|
2017-05-30 10:49:25 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
enum nat_action_e {
|
|
|
|
|
|
NAT_ACTION_SRC = 1 << 0,
|
|
|
|
|
|
NAT_ACTION_SRC_PORT = 1 << 1,
|
|
|
|
|
|
NAT_ACTION_DST = 1 << 2,
|
|
|
|
|
|
NAT_ACTION_DST_PORT = 1 << 3,
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct nat_action_info_t {
|
2019-02-14 13:15:08 -08:00
|
|
|
|
union ct_addr min_addr;
|
|
|
|
|
|
union ct_addr max_addr;
|
2017-05-30 10:49:25 -07:00
|
|
|
|
uint16_t min_port;
|
|
|
|
|
|
uint16_t max_port;
|
|
|
|
|
|
uint16_t nat_action;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
|
void conntrack_init(struct conntrack *);
|
|
|
|
|
|
void conntrack_destroy(struct conntrack *);
|
|
|
|
|
|
|
2017-12-04 08:13:06 -08:00
|
|
|
|
int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
|
|
|
|
|
|
ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
|
|
|
|
|
|
const uint32_t *setmark,
|
2017-05-30 10:49:27 -07:00
|
|
|
|
const struct ovs_key_ct_labels *setlabel,
|
2017-12-04 08:13:06 -08:00
|
|
|
|
ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
|
2017-08-24 22:16:53 -07:00
|
|
|
|
const struct nat_action_info_t *nat_action_info,
|
|
|
|
|
|
long long now);
|
2018-01-19 14:21:51 -05:00
|
|
|
|
void conntrack_clear(struct dp_packet *packet);
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
|
|
|
struct conntrack_dump {
|
|
|
|
|
|
struct conntrack *ct;
|
|
|
|
|
|
unsigned bucket;
|
|
|
|
|
|
struct hmap_position bucket_pos;
|
|
|
|
|
|
bool filter_zone;
|
|
|
|
|
|
uint16_t zone;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct ct_dpif_entry;
|
2018-02-12 14:02:37 -08:00
|
|
|
|
struct ct_dpif_tuple;
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
|
|
|
int conntrack_dump_start(struct conntrack *, struct conntrack_dump *,
|
2017-08-01 20:12:03 -07:00
|
|
|
|
const uint16_t *pzone, int *);
|
2015-11-15 22:07:25 -08:00
|
|
|
|
int conntrack_dump_next(struct conntrack_dump *, struct ct_dpif_entry *);
|
|
|
|
|
|
int conntrack_dump_done(struct conntrack_dump *);
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
|
|
|
int conntrack_flush(struct conntrack *, const uint16_t *zone);
|
2018-02-12 14:02:37 -08:00
|
|
|
|
int conntrack_flush_tuple(struct conntrack *, const struct ct_dpif_tuple *,
|
|
|
|
|
|
uint16_t zone);
|
2018-01-08 15:18:42 -08:00
|
|
|
|
int conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns);
|
|
|
|
|
|
int conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns);
|
2018-01-08 15:18:43 -08:00
|
|
|
|
int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns);
|
2019-02-13 15:34:21 -08:00
|
|
|
|
struct ipf *conntrack_ipf_ctx(struct conntrack *ct);
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
|
|
|
/* 'struct ct_lock' is a wrapper for an adaptive mutex. It's useful to try
|
|
|
|
|
|
* different types of locks (e.g. spinlocks) */
|
|
|
|
|
|
|
|
|
|
|
|
struct OVS_LOCKABLE ct_lock {
|
|
|
|
|
|
struct ovs_mutex lock;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2017-05-30 10:49:27 -07:00
|
|
|
|
struct OVS_LOCKABLE ct_rwlock {
|
|
|
|
|
|
struct ovs_rwlock lock;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
|
static inline void ct_lock_init(struct ct_lock *lock)
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_mutex_init_adaptive(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_lock_lock(struct ct_lock *lock)
|
|
|
|
|
|
OVS_ACQUIRES(lock)
|
|
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_mutex_lock(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_lock_unlock(struct ct_lock *lock)
|
|
|
|
|
|
OVS_RELEASES(lock)
|
|
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_mutex_unlock(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_lock_destroy(struct ct_lock *lock)
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_mutex_destroy(&lock->lock);
|
|
|
|
|
|
}
|
2017-05-30 10:49:27 -07:00
|
|
|
|
|
|
|
|
|
|
static inline void ct_rwlock_init(struct ct_rwlock *lock)
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_rwlock_init(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_rwlock_wrlock(struct ct_rwlock *lock)
|
|
|
|
|
|
OVS_ACQ_WRLOCK(lock)
|
|
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_rwlock_wrlock(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_rwlock_rdlock(struct ct_rwlock *lock)
|
|
|
|
|
|
OVS_ACQ_RDLOCK(lock)
|
|
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_rwlock_rdlock(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_rwlock_unlock(struct ct_rwlock *lock)
|
|
|
|
|
|
OVS_RELEASES(lock)
|
|
|
|
|
|
OVS_NO_THREAD_SAFETY_ANALYSIS
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_rwlock_unlock(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void ct_rwlock_destroy(struct ct_rwlock *lock)
|
|
|
|
|
|
{
|
|
|
|
|
|
ovs_rwlock_destroy(&lock->lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
|
|
|
|
|
/* Timeouts: all the possible timeout states passed to update_expiration()
|
|
|
|
|
|
* are listed here. The name will be prefix by CT_TM_ and the value is in
|
|
|
|
|
|
* milliseconds */
|
|
|
|
|
|
#define CT_TIMEOUTS \
|
|
|
|
|
|
CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
|
2016-05-13 15:04:17 -07:00
|
|
|
|
CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
|
|
|
|
|
|
CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
|
2015-11-15 22:07:25 -08:00
|
|
|
|
|
2016-05-16 12:59:23 -07:00
|
|
|
|
/* The smallest of the above values: it is used as an upper bound for the
|
|
|
|
|
|
* interval between two rounds of cleanup of expired entries */
|
|
|
|
|
|
#define CT_TM_MIN (30 * 1000)
|
|
|
|
|
|
|
|
|
|
|
|
#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
|
|
|
|
|
|
CT_TIMEOUTS
|
|
|
|
|
|
#undef CT_TIMEOUT
|
|
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
|
enum ct_timeout {
|
|
|
|
|
|
#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
|
|
|
|
|
|
CT_TIMEOUTS
|
|
|
|
|
|
#undef CT_TIMEOUT
|
|
|
|
|
|
N_CT_TM
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* Locking:
|
|
|
|
|
|
*
|
|
|
|
|
|
* The connections are kept in different buckets, which are completely
|
|
|
|
|
|
* independent. The connection bucket is determined by the hash of its key.
|
2016-05-16 12:59:23 -07:00
|
|
|
|
*
|
|
|
|
|
|
* Each bucket has two locks. Acquisition order is, from outermost to
|
|
|
|
|
|
* innermost:
|
|
|
|
|
|
*
|
|
|
|
|
|
* cleanup_mutex
|
|
|
|
|
|
* lock
|
|
|
|
|
|
*
|
2015-11-15 22:07:25 -08:00
|
|
|
|
* */
|
|
|
|
|
|
struct conntrack_bucket {
|
2016-05-16 12:59:23 -07:00
|
|
|
|
/* Protects 'connections' and 'exp_lists'. Used in the fast path */
|
2015-11-15 22:07:25 -08:00
|
|
|
|
struct ct_lock lock;
|
2016-05-16 12:59:23 -07:00
|
|
|
|
/* Contains the connections in the bucket, indexed by 'struct conn_key' */
|
2015-11-15 22:07:25 -08:00
|
|
|
|
struct hmap connections OVS_GUARDED;
|
2016-05-16 12:59:23 -07:00
|
|
|
|
/* For each possible timeout we have a list of connections. When the
|
|
|
|
|
|
* timeout of a connection is updated, we move it to the back of the list.
|
|
|
|
|
|
* Since the connection in a list have the same relative timeout, the list
|
|
|
|
|
|
* will be ordered, with the oldest connections to the front. */
|
|
|
|
|
|
struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
|
|
|
|
|
|
|
|
|
|
|
|
/* Protects 'next_cleanup'. Used to make sure that there's only one thread
|
|
|
|
|
|
* performing the cleanup. */
|
|
|
|
|
|
struct ovs_mutex cleanup_mutex;
|
|
|
|
|
|
long long next_cleanup OVS_GUARDED;
|
2015-11-15 22:07:25 -08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#define CONNTRACK_BUCKETS_SHIFT 8
|
|
|
|
|
|
#define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT)
|
|
|
|
|
|
|
|
|
|
|
|
struct conntrack {
|
|
|
|
|
|
/* Independent buckets containing the connections */
|
|
|
|
|
|
struct conntrack_bucket buckets[CONNTRACK_BUCKETS];
|
|
|
|
|
|
|
|
|
|
|
|
/* Salt for hashing a connection key. */
|
|
|
|
|
|
uint32_t hash_basis;
|
|
|
|
|
|
|
2016-05-16 12:59:23 -07:00
|
|
|
|
/* The thread performing periodic cleanup of the connection
|
|
|
|
|
|
* tracker */
|
|
|
|
|
|
pthread_t clean_thread;
|
|
|
|
|
|
/* Latch to destroy the 'clean_thread' */
|
|
|
|
|
|
struct latch clean_thread_exit;
|
|
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
|
/* Number of connections currently in the connection tracker. */
|
|
|
|
|
|
atomic_count n_conn;
|
|
|
|
|
|
/* Connections limit. When this limit is reached, no new connection
|
|
|
|
|
|
* will be accepted. */
|
|
|
|
|
|
atomic_uint n_conn_limit;
|
2017-05-30 10:49:27 -07:00
|
|
|
|
|
|
|
|
|
|
/* The following resources are referenced during nat connection
|
|
|
|
|
|
* creation and deletion. */
|
|
|
|
|
|
struct hmap nat_conn_keys OVS_GUARDED;
|
2017-08-06 10:51:14 -07:00
|
|
|
|
/* Hash table for alg expectations. Expectations are created
|
|
|
|
|
|
* by control connections to help create data connections. */
|
|
|
|
|
|
struct hmap alg_expectations OVS_GUARDED;
|
2018-01-09 15:44:54 -08:00
|
|
|
|
/* Used to lookup alg expectations from the control context. */
|
|
|
|
|
|
struct hindex alg_expectation_refs OVS_GUARDED;
|
2017-08-06 10:51:14 -07:00
|
|
|
|
/* Expiry list for alg expectations. */
|
|
|
|
|
|
struct ovs_list alg_exp_list OVS_GUARDED;
|
2017-05-30 10:49:27 -07:00
|
|
|
|
/* This lock is used during NAT connection creation and deletion;
|
|
|
|
|
|
* it is taken after a bucket lock and given back before that
|
|
|
|
|
|
* bucket unlock.
|
2017-08-06 10:51:14 -07:00
|
|
|
|
* This lock is similarly used to guard alg_expectations and
|
2018-01-09 15:44:54 -08:00
|
|
|
|
* alg_expectation_refs. If a bucket lock is also held during
|
|
|
|
|
|
* the normal code flow, then is must be taken first and released
|
|
|
|
|
|
* last.
|
2017-05-30 10:49:27 -07:00
|
|
|
|
*/
|
2017-07-05 21:32:20 -07:00
|
|
|
|
struct ct_rwlock resources_lock;
|
2017-05-30 10:49:27 -07:00
|
|
|
|
|
2019-02-13 15:34:21 -08:00
|
|
|
|
/* Fragmentation handling context. */
|
|
|
|
|
|
struct ipf *ipf;
|
|
|
|
|
|
|
2015-11-15 22:07:25 -08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* conntrack.h */
|