mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 01:51:26 +00:00
datapath-windows: Add Connection Tracking Support
Enable support for Stateful Firewall in Hyper-V by adding a Connection Tracking module. The module has been ported over from the userspace implementation patch of a similar name. The current version of the module supports ct - zone, mark and label for TCP packets. Support for other packet formats will be added in subsequent patches. The conntrack-tcp module is adapted from FreeBSD's pf subsystem and hence the BSD license. It has been ported over to match OVS Hyper-V coding style. Signed-off-by: Sairam Venugopal <vsairam@vmware.com> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com> Co-Authored-by: Daniele Di Proietto <diproiettod@vmware.com> Acked-by: Nithin Raju <nithin@vmware.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
parent
ce05810425
commit
792d377d83
5
NOTICE
5
NOTICE
@ -38,3 +38,8 @@ Copyright (c) 2008, 2009, 2010 Sten Spans <sten@blinkenlights.nl>
|
|||||||
Auto Attach implementation
|
Auto Attach implementation
|
||||||
Copyright (c) 2014, 2015 WindRiver, Inc
|
Copyright (c) 2014, 2015 WindRiver, Inc
|
||||||
Copyright (c) 2014, 2015 Avaya, Inc
|
Copyright (c) 2014, 2015 Avaya, Inc
|
||||||
|
|
||||||
|
TCP connection tracker from FreeBSD pf, BSD licensed
|
||||||
|
Copyright (c) 2001 Daniel Hartmeier
|
||||||
|
Copyright (c) 2002 - 2008 Henning Brauer
|
||||||
|
Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
|
||||||
|
@ -13,6 +13,9 @@ EXTRA_DIST += \
|
|||||||
datapath-windows/ovsext/Atomic.h \
|
datapath-windows/ovsext/Atomic.h \
|
||||||
datapath-windows/ovsext/BufferMgmt.c \
|
datapath-windows/ovsext/BufferMgmt.c \
|
||||||
datapath-windows/ovsext/BufferMgmt.h \
|
datapath-windows/ovsext/BufferMgmt.h \
|
||||||
|
datapath-windows/ovsext/Conntrack-tcp.c \
|
||||||
|
datapath-windows/ovsext/Conntrack.c \
|
||||||
|
datapath-windows/ovsext/Conntrack.h \
|
||||||
datapath-windows/ovsext/Datapath.c \
|
datapath-windows/ovsext/Datapath.c \
|
||||||
datapath-windows/ovsext/Datapath.h \
|
datapath-windows/ovsext/Datapath.h \
|
||||||
datapath-windows/ovsext/Debug.c \
|
datapath-windows/ovsext/Debug.c \
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "precomp.h"
|
#include "precomp.h"
|
||||||
|
|
||||||
#include "Actions.h"
|
#include "Actions.h"
|
||||||
|
#include "Conntrack.h"
|
||||||
#include "Debug.h"
|
#include "Debug.h"
|
||||||
#include "Event.h"
|
#include "Event.h"
|
||||||
#include "Flow.h"
|
#include "Flow.h"
|
||||||
@ -1786,6 +1787,28 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OVS_ACTION_ATTR_CT:
|
||||||
|
{
|
||||||
|
if (ovsFwdCtx.destPortsSizeOut > 0
|
||||||
|
|| ovsFwdCtx.tunnelTxNic != NULL
|
||||||
|
|| ovsFwdCtx.tunnelRxNic != NULL) {
|
||||||
|
status = OvsOutputBeforeSetAction(&ovsFwdCtx);
|
||||||
|
if (status != NDIS_STATUS_SUCCESS) {
|
||||||
|
dropReason = L"OVS-adding destination failed";
|
||||||
|
goto dropit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
|
||||||
|
key, (const PNL_ATTR)a);
|
||||||
|
if (status != NDIS_STATUS_SUCCESS) {
|
||||||
|
OVS_LOG_ERROR("CT Action failed");
|
||||||
|
dropReason = L"OVS-conntrack action failed";
|
||||||
|
goto dropit;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case OVS_ACTION_ATTR_RECIRC:
|
case OVS_ACTION_ATTR_RECIRC:
|
||||||
{
|
{
|
||||||
if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
|
if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
|
||||||
|
532
datapath-windows/ovsext/Conntrack-tcp.c
Normal file
532
datapath-windows/ovsext/Conntrack-tcp.c
Normal file
@ -0,0 +1,532 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright (c) 2001 Daniel Hartmeier
|
||||||
|
* Copyright (c) 2002 - 2008 Henning Brauer
|
||||||
|
* Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
|
||||||
|
* Copyright (c) 2015, 2016 VMware, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* - Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following
|
||||||
|
* disclaimer in the documentation and/or other materials provided
|
||||||
|
* with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Effort sponsored in part by the Defense Advanced Research Projects
|
||||||
|
* Agency (DARPA) and Air Force Research Laboratory, Air Force
|
||||||
|
* Materiel Command, USAF, under agreement number F30602-01-2-0537.
|
||||||
|
*
|
||||||
|
* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Conntrack.h"
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
struct tcp_peer {
|
||||||
|
enum ct_dpif_tcp_state state;
|
||||||
|
uint32_t seqlo; /* Max sequence number sent */
|
||||||
|
uint32_t seqhi; /* Max the other end ACKd + win */
|
||||||
|
uint16_t max_win;/* largest window (pre scaling) */
|
||||||
|
uint8_t wscale; /* window scaling factor */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct conn_tcp {
|
||||||
|
struct OVS_CT_ENTRY up;
|
||||||
|
struct tcp_peer peer[2];
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
TCPOPT_EOL,
|
||||||
|
TCPOPT_NOP,
|
||||||
|
TCPOPT_WINDOW = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Given POINTER, the address of the given MEMBER in a STRUCT object, returns
|
||||||
|
the STRUCT object. */
|
||||||
|
#define CONTAINER_OF(POINTER, STRUCT, MEMBER) \
|
||||||
|
((STRUCT *) (void *) ((char *) (POINTER) - \
|
||||||
|
offsetof (STRUCT, MEMBER)))
|
||||||
|
|
||||||
|
|
||||||
|
/* TCP sequence numbers are 32 bit integers operated
|
||||||
|
* on with modular arithmetic. These macros can be
|
||||||
|
* used to compare such integers. */
|
||||||
|
#define SEQ_LT(a,b) ((int)((a)-(b)) < 0)
|
||||||
|
#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
|
||||||
|
#define SEQ_GT(a,b) ((int)((a)-(b)) > 0)
|
||||||
|
#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0)
|
||||||
|
|
||||||
|
#define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b))
|
||||||
|
#define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b))
|
||||||
|
|
||||||
|
#define TCP_FIN 0x001
|
||||||
|
#define TCP_SYN 0x002
|
||||||
|
#define TCP_RST 0x004
|
||||||
|
#define TCP_PSH 0x008
|
||||||
|
#define TCP_ACK 0x010
|
||||||
|
#define TCP_URG 0x020
|
||||||
|
#define TCP_ECE 0x040
|
||||||
|
#define TCP_CWR 0x080
|
||||||
|
#define TCP_NS 0x100
|
||||||
|
|
||||||
|
#define CT_DPIF_TCP_FLAGS \
|
||||||
|
CT_DPIF_TCP_FLAG(WINDOW_SCALE) \
|
||||||
|
CT_DPIF_TCP_FLAG(SACK_PERM) \
|
||||||
|
CT_DPIF_TCP_FLAG(CLOSE_INIT) \
|
||||||
|
CT_DPIF_TCP_FLAG(BE_LIBERAL) \
|
||||||
|
CT_DPIF_TCP_FLAG(DATA_UNACKNOWLEDGED) \
|
||||||
|
CT_DPIF_TCP_FLAG(MAXACK_SET) \
|
||||||
|
|
||||||
|
enum ct_dpif_tcp_flags_count_ {
|
||||||
|
#define CT_DPIF_TCP_FLAG(FLAG) FLAG##_COUNT_,
|
||||||
|
CT_DPIF_TCP_FLAGS
|
||||||
|
#undef CT_DPIF_TCP_FLAG
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ct_dpif_tcp_flags {
|
||||||
|
#define CT_DPIF_TCP_FLAG(FLAG) CT_DPIF_TCPF_##FLAG = (1 << \
|
||||||
|
FLAG##_COUNT_),
|
||||||
|
CT_DPIF_TCP_FLAGS
|
||||||
|
#undef CT_DPIF_TCP_FLAG
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define CT_DPIF_TCP_STATES \
|
||||||
|
CT_DPIF_TCP_STATE(CLOSED) \
|
||||||
|
CT_DPIF_TCP_STATE(LISTEN) \
|
||||||
|
CT_DPIF_TCP_STATE(SYN_SENT) \
|
||||||
|
CT_DPIF_TCP_STATE(SYN_RECV) \
|
||||||
|
CT_DPIF_TCP_STATE(ESTABLISHED) \
|
||||||
|
CT_DPIF_TCP_STATE(CLOSE_WAIT) \
|
||||||
|
CT_DPIF_TCP_STATE(FIN_WAIT_1) \
|
||||||
|
CT_DPIF_TCP_STATE(CLOSING) \
|
||||||
|
CT_DPIF_TCP_STATE(LAST_ACK) \
|
||||||
|
CT_DPIF_TCP_STATE(FIN_WAIT_2) \
|
||||||
|
CT_DPIF_TCP_STATE(TIME_WAIT)
|
||||||
|
|
||||||
|
enum ct_dpif_tcp_state {
|
||||||
|
#define CT_DPIF_TCP_STATE(STATE) CT_DPIF_TCPS_##STATE,
|
||||||
|
CT_DPIF_TCP_STATES
|
||||||
|
#undef CT_DPIF_TCP_STATE
|
||||||
|
};
|
||||||
|
|
||||||
|
#define TCP_MAX_WSCALE 14
|
||||||
|
#define CT_WSCALE_FLAG 0x80
|
||||||
|
#define CT_WSCALE_UNKNOWN 0x40
|
||||||
|
#define CT_WSCALE_MASK 0xf
|
||||||
|
|
||||||
|
/* pf does this in in pf_normalize_tcp(), and it is called only if scrub
|
||||||
|
* is enabled. We're not scrubbing, but this check seems reasonable. */
|
||||||
|
static __inline BOOLEAN
|
||||||
|
OvsConntrackValidateTcpFlags(const TCPHdr *tcp)
|
||||||
|
{
|
||||||
|
if (tcp->syn) {
|
||||||
|
if (tcp->rst) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
if (tcp->fin) {
|
||||||
|
/* Here pf removes the fin flag. We simply mark the packet as
|
||||||
|
* invalid */
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Illegal packet */
|
||||||
|
if (!(tcp->ack || tcp->rst)) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(tcp->ack)) {
|
||||||
|
/* These flags are only valid if ACK is set */
|
||||||
|
if ((tcp->fin) || (tcp->psh) || (tcp->urg)) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline uint8_t
|
||||||
|
OvsTcpGetWscale(const TCPHdr *tcp)
|
||||||
|
{
|
||||||
|
unsigned len = tcp->doff * 4 - sizeof *tcp;
|
||||||
|
const uint8_t *opt = (const uint8_t *)(tcp + 1);
|
||||||
|
uint8_t wscale = 0;
|
||||||
|
uint8_t optlen;
|
||||||
|
|
||||||
|
while (len >= 3) {
|
||||||
|
if (*opt == TCPOPT_EOL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (*opt) {
|
||||||
|
case TCPOPT_NOP:
|
||||||
|
opt++;
|
||||||
|
len--;
|
||||||
|
break;
|
||||||
|
case TCPOPT_WINDOW:
|
||||||
|
wscale = MIN(opt[2], TCP_MAX_WSCALE);
|
||||||
|
wscale |= CT_WSCALE_FLAG;
|
||||||
|
/* fall through */
|
||||||
|
default:
|
||||||
|
optlen = opt[2];
|
||||||
|
if (optlen < 2) {
|
||||||
|
optlen = 2;
|
||||||
|
}
|
||||||
|
len -= optlen;
|
||||||
|
opt += optlen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return wscale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline uint32_t
|
||||||
|
OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
|
||||||
|
{
|
||||||
|
IPHdr *ipHdr;
|
||||||
|
char *ipBuf[sizeof(IPHdr)];
|
||||||
|
PNET_BUFFER curNb;
|
||||||
|
curNb = NET_BUFFER_LIST_FIRST_NB(nbl);
|
||||||
|
ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
|
||||||
|
1 /*no align*/, 0);
|
||||||
|
TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
|
||||||
|
return (UINT16)ntohs(ipHdr->tot_len)
|
||||||
|
- (ipHdr->ihl * 4)
|
||||||
|
- (sizeof * tcp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline void
|
||||||
|
OvsConntrackUpdateExpiration(struct conn_tcp *conn,
|
||||||
|
long long now,
|
||||||
|
long long interval)
|
||||||
|
{
|
||||||
|
conn->up.expiration = now + interval;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline struct conn_tcp*
|
||||||
|
OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
|
||||||
|
{
|
||||||
|
return CONTAINER_OF(conn, struct conn_tcp, up);
|
||||||
|
}
|
||||||
|
|
||||||
|
enum CT_UPDATE_RES
|
||||||
|
OvsConntrackUpdateTcpEntry(struct OVS_CT_ENTRY* conn_,
|
||||||
|
const TCPHdr *tcp,
|
||||||
|
PNET_BUFFER_LIST nbl,
|
||||||
|
BOOLEAN reply,
|
||||||
|
UINT64 now)
|
||||||
|
{
|
||||||
|
struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
|
||||||
|
/* The peer that sent 'pkt' */
|
||||||
|
struct tcp_peer *src = &conn->peer[reply ? 1 : 0];
|
||||||
|
/* The peer that should receive 'pkt' */
|
||||||
|
struct tcp_peer *dst = &conn->peer[reply ? 0 : 1];
|
||||||
|
uint8_t sws = 0, dws = 0;
|
||||||
|
uint16_t win = ntohs(tcp->window);
|
||||||
|
uint32_t ack, end, seq, orig_seq;
|
||||||
|
uint32_t p_len = OvsGetTcpPayloadLength(nbl);
|
||||||
|
int ackskew;
|
||||||
|
|
||||||
|
if (OvsConntrackValidateTcpFlags(tcp)) {
|
||||||
|
return CT_UPDATE_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((tcp->syn) && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 &&
|
||||||
|
src->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
|
||||||
|
src->state = dst->state = CT_DPIF_TCPS_CLOSED;
|
||||||
|
return CT_UPDATE_NEW;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src->wscale & CT_WSCALE_FLAG
|
||||||
|
&& dst->wscale & CT_WSCALE_FLAG
|
||||||
|
&& !(tcp->syn)) {
|
||||||
|
|
||||||
|
sws = src->wscale & CT_WSCALE_MASK;
|
||||||
|
dws = dst->wscale & CT_WSCALE_MASK;
|
||||||
|
|
||||||
|
} else if (src->wscale & CT_WSCALE_UNKNOWN
|
||||||
|
&& dst->wscale & CT_WSCALE_UNKNOWN
|
||||||
|
&& !(tcp->syn)) {
|
||||||
|
|
||||||
|
sws = TCP_MAX_WSCALE;
|
||||||
|
dws = TCP_MAX_WSCALE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sequence tracking algorithm from Guido van Rooij's paper:
|
||||||
|
* http://www.madison-gurkha.com/publications/tcp_filtering/
|
||||||
|
* tcp_filtering.ps
|
||||||
|
*/
|
||||||
|
|
||||||
|
orig_seq = seq = ntohl(tcp->seq);
|
||||||
|
if (src->state < CT_DPIF_TCPS_SYN_SENT) {
|
||||||
|
/* First packet from this end. Set its state */
|
||||||
|
|
||||||
|
ack = ntohl(tcp->ack);
|
||||||
|
|
||||||
|
end = seq + p_len;
|
||||||
|
if (tcp->syn) {
|
||||||
|
end++;
|
||||||
|
if (dst->wscale & CT_WSCALE_FLAG) {
|
||||||
|
src->wscale = OvsTcpGetWscale(tcp);
|
||||||
|
if (src->wscale & CT_WSCALE_FLAG) {
|
||||||
|
/* Remove scale factor from initial window */
|
||||||
|
sws = src->wscale & CT_WSCALE_MASK;
|
||||||
|
win = DIV_ROUND_UP((uint32_t) win, 1 << sws);
|
||||||
|
dws = dst->wscale & CT_WSCALE_MASK;
|
||||||
|
} else {
|
||||||
|
/* fixup other window */
|
||||||
|
dst->max_win <<= dst->wscale &
|
||||||
|
CT_WSCALE_MASK;
|
||||||
|
/* in case of a retrans SYN|ACK */
|
||||||
|
dst->wscale = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (tcp->fin) {
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
|
||||||
|
src->seqlo = seq;
|
||||||
|
src->state = CT_DPIF_TCPS_SYN_SENT;
|
||||||
|
/*
|
||||||
|
* May need to slide the window (seqhi may have been set by
|
||||||
|
* the crappy stack check or if we picked up the connection
|
||||||
|
* after establishment)
|
||||||
|
*/
|
||||||
|
if (src->seqhi == 1 ||
|
||||||
|
SEQ_GEQ(end + MAX(1, dst->max_win << dws),
|
||||||
|
src->seqhi)) {
|
||||||
|
src->seqhi = end + MAX(1, dst->max_win << dws);
|
||||||
|
}
|
||||||
|
if (win > src->max_win) {
|
||||||
|
src->max_win = win;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
ack = ntohl(tcp->ack);
|
||||||
|
end = seq + p_len;
|
||||||
|
if (tcp->syn) {
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
if (tcp->fin) {
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((tcp->ack) == 0) {
|
||||||
|
/* Let it pass through the ack skew check */
|
||||||
|
ack = dst->seqlo;
|
||||||
|
} else if ((ack == 0
|
||||||
|
&& (tcp->ack && tcp->rst) == (TCP_ACK|TCP_RST))
|
||||||
|
/* broken tcp stacks do not set ack */) {
|
||||||
|
/* Many stacks (ours included) will set the ACK number in an
|
||||||
|
* FIN|ACK if the SYN times out -- no sequence to ACK. */
|
||||||
|
ack = dst->seqlo;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seq == end) {
|
||||||
|
/* Ease sequencing restrictions on no data packets */
|
||||||
|
seq = src->seqlo;
|
||||||
|
end = seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
ackskew = dst->seqlo - ack;
|
||||||
|
#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
|
||||||
|
if (SEQ_GEQ(src->seqhi, end)
|
||||||
|
/* Last octet inside other's window space */
|
||||||
|
&& SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))
|
||||||
|
/* Retrans: not more than one window back */
|
||||||
|
&& (ackskew >= -MAXACKWINDOW)
|
||||||
|
/* Acking not more than one reassembled fragment backwards */
|
||||||
|
&& (ackskew <= (MAXACKWINDOW << sws))
|
||||||
|
/* Acking not more than one window forward */
|
||||||
|
&& ((tcp->rst) == 0 || orig_seq == src->seqlo
|
||||||
|
|| (orig_seq == src->seqlo + 1)
|
||||||
|
|| (orig_seq + 1 == src->seqlo))) {
|
||||||
|
/* Require an exact/+1 sequence match on resets when possible */
|
||||||
|
|
||||||
|
/* update max window */
|
||||||
|
if (src->max_win < win) {
|
||||||
|
src->max_win = win;
|
||||||
|
}
|
||||||
|
/* synchronize sequencing */
|
||||||
|
if (SEQ_GT(end, src->seqlo)) {
|
||||||
|
src->seqlo = end;
|
||||||
|
}
|
||||||
|
/* slide the window of what the other end can send */
|
||||||
|
if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
|
||||||
|
dst->seqhi = ack + MAX((win << sws), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* update states */
|
||||||
|
if (tcp->syn && src->state < CT_DPIF_TCPS_SYN_SENT) {
|
||||||
|
src->state = CT_DPIF_TCPS_SYN_SENT;
|
||||||
|
}
|
||||||
|
if (tcp->fin && src->state < CT_DPIF_TCPS_CLOSING) {
|
||||||
|
src->state = CT_DPIF_TCPS_CLOSING;
|
||||||
|
}
|
||||||
|
if (tcp->ack) {
|
||||||
|
if (dst->state == CT_DPIF_TCPS_SYN_SENT) {
|
||||||
|
dst->state = CT_DPIF_TCPS_ESTABLISHED;
|
||||||
|
} else if (dst->state == CT_DPIF_TCPS_CLOSING) {
|
||||||
|
dst->state = CT_DPIF_TCPS_FIN_WAIT_2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (tcp->rst) {
|
||||||
|
src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
|
||||||
|
&& dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
|
||||||
|
OvsConntrackUpdateExpiration(conn, now, 30 * 10000000LL);
|
||||||
|
} else if (src->state >= CT_DPIF_TCPS_CLOSING
|
||||||
|
&& dst->state >= CT_DPIF_TCPS_CLOSING) {
|
||||||
|
OvsConntrackUpdateExpiration(conn, now, 45 * 10000000LL);
|
||||||
|
} else if (src->state < CT_DPIF_TCPS_ESTABLISHED
|
||||||
|
|| dst->state < CT_DPIF_TCPS_ESTABLISHED) {
|
||||||
|
OvsConntrackUpdateExpiration(conn, now, 30 * 10000000LL);
|
||||||
|
} else if (src->state >= CT_DPIF_TCPS_CLOSING
|
||||||
|
|| dst->state >= CT_DPIF_TCPS_CLOSING) {
|
||||||
|
OvsConntrackUpdateExpiration(conn, now, 15 * 60 * 10000000LL);
|
||||||
|
} else {
|
||||||
|
OvsConntrackUpdateExpiration(conn, now, 24 * 60 * 60 * 10000000LL);
|
||||||
|
}
|
||||||
|
} else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
|
||||||
|
|| dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
|
||||||
|
|| src->state >= CT_DPIF_TCPS_FIN_WAIT_2)
|
||||||
|
&& SEQ_GEQ(src->seqhi + MAXACKWINDOW, end)
|
||||||
|
/* Within a window forward of the originating packet */
|
||||||
|
&& SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
|
||||||
|
/* Within a window backward of the originating packet */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This currently handles three situations:
|
||||||
|
* 1) Stupid stacks will shotgun SYNs before their peer
|
||||||
|
* replies.
|
||||||
|
* 2) When PF catches an already established stream (the
|
||||||
|
* firewall rebooted, the state table was flushed, routes
|
||||||
|
* changed...)
|
||||||
|
* 3) Packets get funky immediately after the connection
|
||||||
|
* closes (this should catch Solaris spurious ACK|FINs
|
||||||
|
* that web servers like to spew after a close)
|
||||||
|
*
|
||||||
|
* This must be a little more careful than the above code
|
||||||
|
* since packet floods will also be caught here. We don't
|
||||||
|
* update the TTL here to mitigate the damage of a packet
|
||||||
|
* flood and so the same code can handle awkward establishment
|
||||||
|
* and a loosened connection close.
|
||||||
|
* In the establishment case, a correct peer response will
|
||||||
|
* validate the connection, go through the normal state code
|
||||||
|
* and keep updating the state TTL.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* update max window */
|
||||||
|
if (src->max_win < win) {
|
||||||
|
src->max_win = win;
|
||||||
|
}
|
||||||
|
/* synchronize sequencing */
|
||||||
|
if (SEQ_GT(end, src->seqlo)) {
|
||||||
|
src->seqlo = end;
|
||||||
|
}
|
||||||
|
/* slide the window of what the other end can send */
|
||||||
|
if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
|
||||||
|
dst->seqhi = ack + MAX((win << sws), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cannot set dst->seqhi here since this could be a shotgunned
|
||||||
|
* SYN and not an already established connection.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (tcp->fin && src->state < CT_DPIF_TCPS_CLOSING) {
|
||||||
|
src->state = CT_DPIF_TCPS_CLOSING;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tcp->rst) {
|
||||||
|
src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return CT_UPDATE_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CT_UPDATE_VALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOLEAN
|
||||||
|
OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
|
||||||
|
{
|
||||||
|
if (tcp == NULL || OvsConntrackValidateTcpFlags(tcp)) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A syn+ack is not allowed to create a connection. We want to allow
|
||||||
|
* totally new connections (syn) or already established, not partially
|
||||||
|
* open (syn+ack). */
|
||||||
|
if ((tcp->syn) && (tcp->ack)) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
OVS_CT_ENTRY *
|
||||||
|
OvsNewTcpConntrack(const TCPHdr *tcp,
|
||||||
|
PNET_BUFFER_LIST nbl,
|
||||||
|
UINT64 now)
|
||||||
|
{
|
||||||
|
struct conn_tcp* newconn = NULL;
|
||||||
|
struct tcp_peer *src, *dst;
|
||||||
|
|
||||||
|
newconn = OvsAllocateMemoryWithTag(sizeof(struct conn_tcp),
|
||||||
|
OVS_CT_POOL_TAG);
|
||||||
|
newconn->up = (OVS_CT_ENTRY) {0};
|
||||||
|
src = &newconn->peer[0];
|
||||||
|
dst = &newconn->peer[1];
|
||||||
|
|
||||||
|
src->seqlo = ntohl(tcp->seq);
|
||||||
|
src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
|
||||||
|
|
||||||
|
if (tcp->syn) {
|
||||||
|
src->seqhi++;
|
||||||
|
src->wscale = OvsTcpGetWscale(tcp);
|
||||||
|
} else {
|
||||||
|
src->wscale = CT_WSCALE_UNKNOWN;
|
||||||
|
dst->wscale = CT_WSCALE_UNKNOWN;
|
||||||
|
}
|
||||||
|
src->max_win = MAX(ntohs(tcp->window), 1);
|
||||||
|
if (src->wscale & CT_WSCALE_MASK) {
|
||||||
|
/* Remove scale factor from initial window */
|
||||||
|
uint8_t sws = src->wscale & CT_WSCALE_MASK;
|
||||||
|
src->max_win = DIV_ROUND_UP((uint32_t) src->max_win,
|
||||||
|
1 << sws);
|
||||||
|
}
|
||||||
|
if (tcp->fin) {
|
||||||
|
src->seqhi++;
|
||||||
|
}
|
||||||
|
dst->seqhi = 1;
|
||||||
|
dst->max_win = 1;
|
||||||
|
src->state = CT_DPIF_TCPS_SYN_SENT;
|
||||||
|
dst->state = CT_DPIF_TCPS_CLOSED;
|
||||||
|
|
||||||
|
OvsConntrackUpdateExpiration(newconn, now, CT_ENTRY_TIMEOUT);
|
||||||
|
|
||||||
|
return &newconn->up;
|
||||||
|
}
|
530
datapath-windows/ovsext/Conntrack.c
Normal file
530
datapath-windows/ovsext/Conntrack.c
Normal file
@ -0,0 +1,530 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, 2016 VMware, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef OVS_DBG_MOD
|
||||||
|
#undef OVS_DBG_MOD
|
||||||
|
#endif
|
||||||
|
#define OVS_DBG_MOD OVS_DBG_CONTRK
|
||||||
|
|
||||||
|
#include "Conntrack.h"
|
||||||
|
#include "Jhash.h"
|
||||||
|
#include "PacketParser.h"
|
||||||
|
#include "Debug.h"
|
||||||
|
|
||||||
|
typedef struct _OVS_CT_THREAD_CTX {
|
||||||
|
KEVENT event;
|
||||||
|
PVOID threadObject;
|
||||||
|
UINT32 exit;
|
||||||
|
} OVS_CT_THREAD_CTX, *POVS_CT_THREAD_CTX;
|
||||||
|
|
||||||
|
KSTART_ROUTINE ovsConntrackEntryCleaner;
|
||||||
|
static PLIST_ENTRY ovsConntrackTable;
|
||||||
|
static OVS_CT_THREAD_CTX ctThreadCtx;
|
||||||
|
static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
|
||||||
|
|
||||||
|
/*
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
* OvsInitConntrack
|
||||||
|
* Initialize the components used by Connection Tracking
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
NTSTATUS
|
||||||
|
OvsInitConntrack(POVS_SWITCH_CONTEXT context)
|
||||||
|
{
|
||||||
|
NTSTATUS status;
|
||||||
|
HANDLE threadHandle = NULL;
|
||||||
|
|
||||||
|
/* Init the sync-lock */
|
||||||
|
ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
|
||||||
|
if (ovsConntrackLockObj == NULL) {
|
||||||
|
return STATUS_INSUFFICIENT_RESOURCES;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Init the Hash Buffer */
|
||||||
|
ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
|
||||||
|
* CT_HASH_TABLE_SIZE,
|
||||||
|
OVS_CT_POOL_TAG);
|
||||||
|
if (ovsConntrackTable == NULL) {
|
||||||
|
NdisFreeRWLock(ovsConntrackLockObj);
|
||||||
|
ovsConntrackLockObj = NULL;
|
||||||
|
return STATUS_INSUFFICIENT_RESOURCES;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
|
||||||
|
InitializeListHead(&ovsConntrackTable[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Init CT Cleaner Thread */
|
||||||
|
KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
|
||||||
|
status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
|
||||||
|
NULL, ovsConntrackEntryCleaner,
|
||||||
|
&ctThreadCtx);
|
||||||
|
|
||||||
|
if (status != STATUS_SUCCESS) {
|
||||||
|
NdisFreeRWLock(ovsConntrackLockObj);
|
||||||
|
ovsConntrackLockObj = NULL;
|
||||||
|
|
||||||
|
OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
|
||||||
|
ovsConntrackTable = NULL;
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
|
||||||
|
&ctThreadCtx.threadObject, NULL);
|
||||||
|
ZwClose(threadHandle);
|
||||||
|
threadHandle = NULL;
|
||||||
|
return STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
* OvsCleanupConntrack
|
||||||
|
* Cleanup memory and thread that were spawned for Connection tracking
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
VOID
|
||||||
|
OvsCleanupConntrack(VOID)
|
||||||
|
{
|
||||||
|
LOCK_STATE_EX lockState;
|
||||||
|
NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
|
||||||
|
ctThreadCtx.exit = 1;
|
||||||
|
KeSetEvent(&ctThreadCtx.event, 0, FALSE);
|
||||||
|
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
|
||||||
|
|
||||||
|
KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
|
||||||
|
KernelMode, FALSE, NULL);
|
||||||
|
ObDereferenceObject(ctThreadCtx.threadObject);
|
||||||
|
|
||||||
|
if (ovsConntrackTable) {
|
||||||
|
OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
|
||||||
|
ovsConntrackTable = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
NdisFreeRWLock(ovsConntrackLockObj);
|
||||||
|
ovsConntrackLockObj = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline VOID
|
||||||
|
OvsCtKeyReverse(OVS_CT_KEY *key)
|
||||||
|
{
|
||||||
|
struct ct_endpoint tmp;
|
||||||
|
tmp = key->src;
|
||||||
|
key->src = key->dst;
|
||||||
|
key->dst = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline VOID
|
||||||
|
OvsCtUpdateFlowKey(struct OvsFlowKey *key,
|
||||||
|
UINT32 state,
|
||||||
|
UINT16 zone,
|
||||||
|
UINT32 mark,
|
||||||
|
struct ovs_key_ct_labels *labels)
|
||||||
|
{
|
||||||
|
key->ct.state = state | OVS_CS_F_TRACKED;
|
||||||
|
key->ct.zone = zone;
|
||||||
|
key->ct.mark = mark;
|
||||||
|
if (labels) {
|
||||||
|
NdisMoveMemory(&key->ct.labels, labels,
|
||||||
|
sizeof(struct ovs_key_ct_labels));
|
||||||
|
} else {
|
||||||
|
memset(&key->ct.labels, 0,
|
||||||
|
sizeof(struct ovs_key_ct_labels));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline POVS_CT_ENTRY
|
||||||
|
OvsCtEntryCreate(const TCPHdr *tcp,
|
||||||
|
PNET_BUFFER_LIST curNbl,
|
||||||
|
OvsConntrackKeyLookupCtx *ctx,
|
||||||
|
OvsFlowKey *key,
|
||||||
|
BOOLEAN commit,
|
||||||
|
UINT64 currentTime)
|
||||||
|
{
|
||||||
|
POVS_CT_ENTRY entry = NULL;
|
||||||
|
UINT32 state = 0;
|
||||||
|
if (!OvsConntrackValidateTcpPacket(tcp)) {
|
||||||
|
state |= OVS_CS_F_INVALID;
|
||||||
|
OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
state |= OVS_CS_F_NEW;
|
||||||
|
if (commit) {
|
||||||
|
entry = OvsNewTcpConntrack(tcp, curNbl, currentTime);
|
||||||
|
NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
|
||||||
|
NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
|
||||||
|
OvsCtKeyReverse(&entry->rev_key);
|
||||||
|
InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
|
||||||
|
&entry->link);
|
||||||
|
}
|
||||||
|
|
||||||
|
OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline VOID
|
||||||
|
OvsCtEntryDelete(POVS_CT_ENTRY entry)
|
||||||
|
{
|
||||||
|
RemoveEntryList(&entry->link);
|
||||||
|
OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline BOOLEAN
|
||||||
|
OvsCtEntryExpired(POVS_CT_ENTRY entry)
|
||||||
|
{
|
||||||
|
if (entry == NULL)
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
UINT64 currentTime;
|
||||||
|
NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
|
||||||
|
return entry->expiration < currentTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline NDIS_STATUS
|
||||||
|
OvsDetectCtPacket(OvsFlowKey *key)
|
||||||
|
{
|
||||||
|
/* Currently we support only Unfragmented TCP packets */
|
||||||
|
switch (ntohs(key->l2.dlType)) {
|
||||||
|
case ETH_TYPE_IPV4:
|
||||||
|
if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
|
||||||
|
return NDIS_STATUS_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
if (key->ipKey.nwProto != IPPROTO_TCP) {
|
||||||
|
return NDIS_STATUS_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
return NDIS_STATUS_SUCCESS;
|
||||||
|
case ETH_TYPE_IPV6:
|
||||||
|
return NDIS_STATUS_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NDIS_STATUS_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline BOOLEAN
|
||||||
|
OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
|
||||||
|
{
|
||||||
|
return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
|
||||||
|
(ctxKey.src.addr.ipv4_aligned == entryKey.src.addr.ipv4_aligned) &&
|
||||||
|
(ctxKey.src.port == entryKey.src.port) &&
|
||||||
|
(ctxKey.dst.addr.ipv4 == entryKey.dst.addr.ipv4) &&
|
||||||
|
(ctxKey.dst.addr.ipv4_aligned == entryKey.dst.addr.ipv4_aligned) &&
|
||||||
|
(ctxKey.dst.port == entryKey.dst.port) &&
|
||||||
|
(ctxKey.dl_type == entryKey.dl_type) &&
|
||||||
|
(ctxKey.nw_proto == entryKey.nw_proto) &&
|
||||||
|
(ctxKey.zone == entryKey.zone));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline POVS_CT_ENTRY
|
||||||
|
OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
|
||||||
|
{
|
||||||
|
PLIST_ENTRY link;
|
||||||
|
POVS_CT_ENTRY entry;
|
||||||
|
BOOLEAN reply = FALSE;
|
||||||
|
POVS_CT_ENTRY found = NULL;
|
||||||
|
|
||||||
|
LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
|
||||||
|
entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
|
||||||
|
|
||||||
|
if (OvsCtKeyAreSame(ctx->key,entry->key)) {
|
||||||
|
found = entry;
|
||||||
|
reply = FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
|
||||||
|
found = entry;
|
||||||
|
reply = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found) {
|
||||||
|
if (OvsCtEntryExpired(found)) {
|
||||||
|
found = NULL;
|
||||||
|
} else {
|
||||||
|
ctx->reply = reply;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->entry = found;
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline VOID
|
||||||
|
OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
|
||||||
|
UINT16 zone,
|
||||||
|
OvsConntrackKeyLookupCtx *ctx)
|
||||||
|
{
|
||||||
|
UINT32 hsrc, hdst,hash;
|
||||||
|
|
||||||
|
ctx->key.zone = zone;
|
||||||
|
ctx->key.dl_type = flowKey->l2.dlType;
|
||||||
|
|
||||||
|
if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
|
||||||
|
ctx->key.src.addr.ipv4 = flowKey->ipKey.nwSrc;
|
||||||
|
ctx->key.dst.addr.ipv4 = flowKey->ipKey.nwDst;
|
||||||
|
ctx->key.nw_proto = flowKey->ipKey.nwProto;
|
||||||
|
|
||||||
|
ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
|
||||||
|
ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
|
||||||
|
} else if (flowKey->l2.dlType == htons(ETH_TYPE_IPV6)) {
|
||||||
|
ctx->key.src.addr.ipv6 = flowKey->ipv6Key.ipv6Src;
|
||||||
|
ctx->key.dst.addr.ipv6 = flowKey->ipv6Key.ipv6Dst;
|
||||||
|
ctx->key.nw_proto = flowKey->ipv6Key.nwProto;
|
||||||
|
|
||||||
|
ctx->key.src.port = flowKey->ipv6Key.l4.tpSrc;
|
||||||
|
ctx->key.dst.port = flowKey->ipv6Key.l4.tpDst;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Related bit is set for ICMP and FTP (Not supported)*/
|
||||||
|
ctx->related = FALSE;
|
||||||
|
|
||||||
|
hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
|
||||||
|
hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
|
||||||
|
hash = hsrc ^ hdst; /* TO identify reverse traffic */
|
||||||
|
ctx->hash = OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
|
||||||
|
((uint32_t *) (&ctx->key + 1) -
|
||||||
|
(uint32_t *) (&ctx->key.dst + 1)),
|
||||||
|
hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
* OvsProcessConntrackEntry
|
||||||
|
* Check the TCP flags and set the ct_state of the entry
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
static __inline POVS_CT_ENTRY
|
||||||
|
OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
|
||||||
|
const TCPHdr *tcp,
|
||||||
|
OvsConntrackKeyLookupCtx *ctx,
|
||||||
|
OvsFlowKey *key,
|
||||||
|
UINT16 zone,
|
||||||
|
BOOLEAN commit,
|
||||||
|
UINT64 currentTime)
|
||||||
|
{
|
||||||
|
POVS_CT_ENTRY entry = ctx->entry;
|
||||||
|
UINT32 state = 0;
|
||||||
|
|
||||||
|
/* If an entry was found, update the state based on TCP flags */
|
||||||
|
if (ctx->related) {
|
||||||
|
state |= OVS_CS_F_RELATED;
|
||||||
|
if (ctx->reply) {
|
||||||
|
state = OVS_CS_F_REPLY_DIR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
CT_UPDATE_RES result;
|
||||||
|
result = OvsConntrackUpdateTcpEntry(entry, tcp, curNbl,
|
||||||
|
ctx->reply, currentTime);
|
||||||
|
switch (result) {
|
||||||
|
case CT_UPDATE_VALID:
|
||||||
|
state |= OVS_CS_F_ESTABLISHED;
|
||||||
|
if (ctx->reply) {
|
||||||
|
state |= OVS_CS_F_REPLY_DIR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CT_UPDATE_INVALID:
|
||||||
|
state |= OVS_CS_F_INVALID;
|
||||||
|
break;
|
||||||
|
case CT_UPDATE_NEW:
|
||||||
|
//Delete and update the Conntrack
|
||||||
|
OvsCtEntryDelete(ctx->entry);
|
||||||
|
ctx->entry = NULL;
|
||||||
|
entry = OvsCtEntryCreate(tcp, curNbl, ctx, key,
|
||||||
|
commit, currentTime);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Copy mark and label from entry into flowKey. If actions specify
|
||||||
|
different mark and label, update the flowKey. */
|
||||||
|
OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline VOID
|
||||||
|
OvsConntrackSetMark(OvsFlowKey *key,
|
||||||
|
POVS_CT_ENTRY entry,
|
||||||
|
UINT32 value,
|
||||||
|
UINT32 mask)
|
||||||
|
{
|
||||||
|
UINT32 newMark;
|
||||||
|
newMark = value | (entry->mark & ~(mask));
|
||||||
|
if (entry->mark != newMark) {
|
||||||
|
entry->mark = newMark;
|
||||||
|
key->ct.mark = newMark;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline void
|
||||||
|
OvsConntrackSetLabels(OvsFlowKey *key,
|
||||||
|
POVS_CT_ENTRY entry,
|
||||||
|
struct ovs_key_ct_labels *val,
|
||||||
|
struct ovs_key_ct_labels *mask)
|
||||||
|
{
|
||||||
|
ovs_u128 v, m, pktMdLabel;
|
||||||
|
memcpy(&v, val, sizeof v);
|
||||||
|
memcpy(&m, mask, sizeof m);
|
||||||
|
|
||||||
|
pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo));
|
||||||
|
pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi));
|
||||||
|
|
||||||
|
NdisMoveMemory(&entry->labels, &pktMdLabel,
|
||||||
|
sizeof(struct ovs_key_ct_labels));
|
||||||
|
NdisMoveMemory(&key->ct.labels, &pktMdLabel,
|
||||||
|
sizeof(struct ovs_key_ct_labels));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline NDIS_STATUS
|
||||||
|
OvsCtExecute_(PNET_BUFFER_LIST curNbl,
|
||||||
|
OvsFlowKey *key,
|
||||||
|
OVS_PACKET_HDR_INFO *layers,
|
||||||
|
BOOLEAN commit,
|
||||||
|
UINT16 zone,
|
||||||
|
MD_MARK *mark,
|
||||||
|
MD_LABELS *labels)
|
||||||
|
{
|
||||||
|
NDIS_STATUS status = NDIS_STATUS_SUCCESS;
|
||||||
|
POVS_CT_ENTRY entry = NULL;
|
||||||
|
OvsConntrackKeyLookupCtx ctx = { 0 };
|
||||||
|
TCPHdr tcpStorage;
|
||||||
|
UINT64 currentTime;
|
||||||
|
LOCK_STATE_EX lockState;
|
||||||
|
const TCPHdr *tcp;
|
||||||
|
tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
|
||||||
|
NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
|
||||||
|
|
||||||
|
/* Retrieve the Conntrack Key related fields from packet */
|
||||||
|
OvsCtSetupLookupCtx(key, zone, &ctx);
|
||||||
|
|
||||||
|
NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
|
||||||
|
|
||||||
|
/* Lookup Conntrack entries for a matching entry */
|
||||||
|
entry = OvsCtLookup(&ctx);
|
||||||
|
|
||||||
|
if (!entry) {
|
||||||
|
/* If no matching entry was found, create one and add New state */
|
||||||
|
entry = OvsCtEntryCreate(tcp, curNbl, &ctx,
|
||||||
|
key, commit, currentTime);
|
||||||
|
} else {
|
||||||
|
/* Process the entry and update CT flags */
|
||||||
|
entry = OvsProcessConntrackEntry(curNbl, tcp, &ctx, key,
|
||||||
|
zone, commit, currentTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry && mark) {
|
||||||
|
OvsConntrackSetMark(key, entry, mark->value, mark->mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry && labels) {
|
||||||
|
OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*---------------------------------------------------------------------------
|
||||||
|
* OvsExecuteConntrackAction
|
||||||
|
* Executes Conntrack actions XXX - Add more
|
||||||
|
*---------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
NDIS_STATUS
|
||||||
|
OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
|
||||||
|
OVS_PACKET_HDR_INFO *layers,
|
||||||
|
OvsFlowKey *key,
|
||||||
|
const PNL_ATTR a)
|
||||||
|
{
|
||||||
|
PNL_ATTR ctAttr;
|
||||||
|
BOOLEAN commit = FALSE;
|
||||||
|
UINT16 zone = 0;
|
||||||
|
MD_MARK *mark = NULL;
|
||||||
|
MD_LABELS *labels = NULL;
|
||||||
|
NDIS_STATUS status;
|
||||||
|
|
||||||
|
status = OvsDetectCtPacket(key);
|
||||||
|
if (status != NDIS_STATUS_SUCCESS) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
|
||||||
|
if (ctAttr) {
|
||||||
|
zone = NlAttrGetU16(ctAttr);
|
||||||
|
}
|
||||||
|
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
|
||||||
|
if (ctAttr) {
|
||||||
|
commit = TRUE;
|
||||||
|
}
|
||||||
|
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
|
||||||
|
if (ctAttr) {
|
||||||
|
mark = NlAttrGet(ctAttr);
|
||||||
|
}
|
||||||
|
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
|
||||||
|
if (ctAttr) {
|
||||||
|
labels = NlAttrGet(ctAttr);
|
||||||
|
}
|
||||||
|
|
||||||
|
status = OvsCtExecute_(curNbl, key, layers,
|
||||||
|
commit, zone, mark, labels);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
* OvsConntrackEnrtyCleaner
|
||||||
|
* Runs periodically and cleans up the connection tracker
|
||||||
|
*----------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
VOID
|
||||||
|
ovsConntrackEntryCleaner(PVOID data)
|
||||||
|
{
|
||||||
|
|
||||||
|
POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
|
||||||
|
PLIST_ENTRY link, next;
|
||||||
|
POVS_CT_ENTRY entry;
|
||||||
|
BOOLEAN success = TRUE;
|
||||||
|
|
||||||
|
while (success) {
|
||||||
|
LOCK_STATE_EX lockState;
|
||||||
|
NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
|
||||||
|
if (context->exit) {
|
||||||
|
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the timeout for the thread and cleanup */
|
||||||
|
UINT64 currentTime, threadSleepTimeout;
|
||||||
|
NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
|
||||||
|
threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
|
||||||
|
|
||||||
|
for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
|
||||||
|
LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
|
||||||
|
entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
|
||||||
|
if (entry->expiration < currentTime) {
|
||||||
|
OvsCtEntryDelete(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
|
||||||
|
KeWaitForSingleObject(&context->event, Executive, KernelMode,
|
||||||
|
FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
PsTerminateSystemThread(STATUS_SUCCESS);
|
||||||
|
}
|
102
datapath-windows/ovsext/Conntrack.h
Normal file
102
datapath-windows/ovsext/Conntrack.h
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, 2016 VMware, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __OVS_CONNTRACK_H_
|
||||||
|
#define __OVS_CONNTRACK_H_ 1
|
||||||
|
|
||||||
|
#include "precomp.h"
|
||||||
|
#include "Flow.h"
|
||||||
|
|
||||||
|
struct ct_addr {
|
||||||
|
union {
|
||||||
|
ovs_be32 ipv4;
|
||||||
|
struct in6_addr ipv6;
|
||||||
|
uint32_t ipv4_aligned;
|
||||||
|
struct in6_addr ipv6_aligned;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ct_endpoint {
|
||||||
|
struct ct_addr addr;
|
||||||
|
ovs_be16 port;
|
||||||
|
UINT16 pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef enum CT_UPDATE_RES {
|
||||||
|
CT_UPDATE_INVALID,
|
||||||
|
CT_UPDATE_VALID,
|
||||||
|
CT_UPDATE_NEW,
|
||||||
|
} CT_UPDATE_RES;
|
||||||
|
|
||||||
|
/* Metadata mark for masked write to conntrack mark */
|
||||||
|
typedef struct MD_MARK {
|
||||||
|
UINT32 value;
|
||||||
|
UINT32 mask;
|
||||||
|
} MD_MARK;
|
||||||
|
|
||||||
|
/* Metadata label for masked write to conntrack label. */
|
||||||
|
typedef struct MD_LABELS {
|
||||||
|
struct ovs_key_ct_labels value;
|
||||||
|
struct ovs_key_ct_labels mask;
|
||||||
|
} MD_LABELS;
|
||||||
|
|
||||||
|
typedef struct _OVS_CT_KEY {
|
||||||
|
struct ct_endpoint src;
|
||||||
|
struct ct_endpoint dst;
|
||||||
|
UINT16 dl_type;
|
||||||
|
UINT8 nw_proto;
|
||||||
|
UINT16 zone;
|
||||||
|
} OVS_CT_KEY, *POVS_CT_KEY;
|
||||||
|
|
||||||
|
typedef struct OVS_CT_ENTRY {
|
||||||
|
OVS_CT_KEY key;
|
||||||
|
OVS_CT_KEY rev_key;
|
||||||
|
UINT64 expiration;
|
||||||
|
LIST_ENTRY link;
|
||||||
|
UINT32 mark;
|
||||||
|
struct ovs_key_ct_labels labels;
|
||||||
|
} OVS_CT_ENTRY, *POVS_CT_ENTRY;
|
||||||
|
|
||||||
|
typedef struct OvsConntrackKeyLookupCtx {
|
||||||
|
OVS_CT_KEY key;
|
||||||
|
POVS_CT_ENTRY entry;
|
||||||
|
UINT32 hash;
|
||||||
|
BOOLEAN reply;
|
||||||
|
BOOLEAN related;
|
||||||
|
} OvsConntrackKeyLookupCtx;
|
||||||
|
|
||||||
|
#define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
|
||||||
|
#define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
|
||||||
|
#define CT_ENTRY_TIMEOUT (2 * 600000000) // 2m
|
||||||
|
#define CT_CLEANUP_INTERVAL (2 * 600000000) // 2m
|
||||||
|
|
||||||
|
VOID OvsCleanupConntrack(VOID);
|
||||||
|
NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context);
|
||||||
|
|
||||||
|
NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
|
||||||
|
OVS_PACKET_HDR_INFO *layers,
|
||||||
|
OvsFlowKey *key,
|
||||||
|
const PNL_ATTR a);
|
||||||
|
BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
|
||||||
|
OVS_CT_ENTRY * OvsNewTcpConntrack(const TCPHdr *tcp,
|
||||||
|
PNET_BUFFER_LIST nbl,
|
||||||
|
UINT64 now);
|
||||||
|
enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(struct OVS_CT_ENTRY* conn_,
|
||||||
|
const TCPHdr *tcp,
|
||||||
|
PNET_BUFFER_LIST nbl,
|
||||||
|
BOOLEAN reply,
|
||||||
|
UINT64 now);
|
||||||
|
#endif /* __OVS_CONNTRACK_H_ */
|
@ -40,6 +40,7 @@
|
|||||||
#define OVS_DBG_NETLINK BIT32(20)
|
#define OVS_DBG_NETLINK BIT32(20)
|
||||||
#define OVS_DBG_TUNFLT BIT32(21)
|
#define OVS_DBG_TUNFLT BIT32(21)
|
||||||
#define OVS_DBG_STT BIT32(22)
|
#define OVS_DBG_STT BIT32(22)
|
||||||
|
#define OVS_DBG_CONTRK BIT32(23)
|
||||||
|
|
||||||
#define OVS_DBG_RESERVED BIT32(31)
|
#define OVS_DBG_RESERVED BIT32(31)
|
||||||
//Please add above OVS_DBG_RESERVED.
|
//Please add above OVS_DBG_RESERVED.
|
||||||
|
@ -167,6 +167,13 @@ typedef __declspec(align(8)) struct OvsFlowKey {
|
|||||||
};
|
};
|
||||||
UINT32 recircId; /* Recirculation ID. */
|
UINT32 recircId; /* Recirculation ID. */
|
||||||
UINT32 dpHash; /* Datapath calculated hash value. */
|
UINT32 dpHash; /* Datapath calculated hash value. */
|
||||||
|
struct {
|
||||||
|
/* Connection tracking fields. */
|
||||||
|
UINT16 zone;
|
||||||
|
UINT32 mark;
|
||||||
|
UINT32 state;
|
||||||
|
struct ovs_key_ct_labels labels;
|
||||||
|
} ct; /* Connection Tracking Flags */
|
||||||
} OvsFlowKey;
|
} OvsFlowKey;
|
||||||
|
|
||||||
#define OVS_WIN_TUNNEL_KEY_SIZE (sizeof (OvsIPv4TunnelKey))
|
#define OVS_WIN_TUNNEL_KEY_SIZE (sizeof (OvsIPv4TunnelKey))
|
||||||
|
@ -172,7 +172,17 @@ const NL_POLICY nlFlowKeyPolicy[] = {
|
|||||||
.maxLen = 4, .optional = TRUE},
|
.maxLen = 4, .optional = TRUE},
|
||||||
[OVS_KEY_ATTR_RECIRC_ID] = {.type = NL_A_UNSPEC, .minLen = 4,
|
[OVS_KEY_ATTR_RECIRC_ID] = {.type = NL_A_UNSPEC, .minLen = 4,
|
||||||
.maxLen = 4, .optional = TRUE},
|
.maxLen = 4, .optional = TRUE},
|
||||||
[OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE}
|
[OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE},
|
||||||
|
[OVS_KEY_ATTR_CT_STATE] = {.type = NL_A_UNSPEC, .minLen = 4,
|
||||||
|
.maxLen = 4, .optional = TRUE},
|
||||||
|
[OVS_KEY_ATTR_CT_ZONE] = {.type = NL_A_UNSPEC, .minLen = 2,
|
||||||
|
.maxLen = 2, .optional = TRUE},
|
||||||
|
[OVS_KEY_ATTR_CT_MARK] = {.type = NL_A_UNSPEC, .minLen = 4,
|
||||||
|
.maxLen = 4, .optional = TRUE},
|
||||||
|
[OVS_KEY_ATTR_CT_LABELS] = {.type = NL_A_UNSPEC,
|
||||||
|
.minLen = sizeof(struct ovs_key_ct_labels),
|
||||||
|
.maxLen = sizeof(struct ovs_key_ct_labels),
|
||||||
|
.optional = TRUE}
|
||||||
};
|
};
|
||||||
const UINT32 nlFlowKeyPolicyLen = ARRAY_SIZE(nlFlowKeyPolicy);
|
const UINT32 nlFlowKeyPolicyLen = ARRAY_SIZE(nlFlowKeyPolicy);
|
||||||
|
|
||||||
@ -229,7 +239,8 @@ const NL_POLICY nlFlowActionPolicy[] = {
|
|||||||
.maxLen = sizeof(struct ovs_action_hash),
|
.maxLen = sizeof(struct ovs_action_hash),
|
||||||
.optional = TRUE},
|
.optional = TRUE},
|
||||||
[OVS_ACTION_ATTR_SET] = {.type = NL_A_VAR_LEN, .optional = TRUE},
|
[OVS_ACTION_ATTR_SET] = {.type = NL_A_VAR_LEN, .optional = TRUE},
|
||||||
[OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE}
|
[OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE},
|
||||||
|
[OVS_ACTION_ATTR_CT] = {.type = NL_A_VAR_LEN, .optional = TRUE}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -850,6 +861,28 @@ MapFlowKeyToNlKey(PNL_BUFFER nlBuf,
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_CT_STATE,
|
||||||
|
flowKey->ct.state)) {
|
||||||
|
rc = STATUS_UNSUCCESSFUL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (!NlMsgPutTailU16(nlBuf, OVS_KEY_ATTR_CT_ZONE,
|
||||||
|
flowKey->ct.zone)) {
|
||||||
|
rc = STATUS_UNSUCCESSFUL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_CT_MARK,
|
||||||
|
flowKey->ct.mark)) {
|
||||||
|
rc = STATUS_UNSUCCESSFUL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
if (!NlMsgPutTailUnspec(nlBuf, OVS_KEY_ATTR_CT_LABELS,
|
||||||
|
(PCHAR)(&flowKey->ct.labels),
|
||||||
|
sizeof(struct ovs_key_ct_labels))) {
|
||||||
|
rc = STATUS_UNSUCCESSFUL;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (flowKey->dpHash) {
|
if (flowKey->dpHash) {
|
||||||
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_DP_HASH,
|
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_DP_HASH,
|
||||||
flowKey->dpHash)) {
|
flowKey->dpHash)) {
|
||||||
@ -1386,6 +1419,24 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
|
|||||||
destKey->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
|
destKey->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
|
||||||
|
destKey->ct.state = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
|
||||||
|
destKey->ct.zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
|
||||||
|
destKey->ct.mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
|
||||||
|
const struct ovs_key_ct_labels *ct_labels;
|
||||||
|
ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
|
||||||
|
RtlCopyMemory(&destKey->ct.labels, ct_labels, sizeof(struct ovs_key_ct_labels));
|
||||||
|
}
|
||||||
|
|
||||||
/* ===== L2 headers ===== */
|
/* ===== L2 headers ===== */
|
||||||
destKey->l2.inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
|
destKey->l2.inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
|
||||||
|
|
||||||
@ -1774,6 +1825,24 @@ OvsGetFlowMetadata(OvsFlowKey *key,
|
|||||||
key->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
|
key->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
|
||||||
|
key->ct.state = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
|
||||||
|
key->ct.zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
|
||||||
|
key->ct.mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
|
||||||
|
const struct ovs_key_ct_labels *ct_labels;
|
||||||
|
ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
|
||||||
|
RtlCopyMemory(&key->ct.labels, ct_labels, sizeof(struct ovs_key_ct_labels));
|
||||||
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2059,6 +2128,11 @@ FlowEqual(OvsFlow *srcFlow,
|
|||||||
srcFlow->key.l2.val == dstKey->l2.val &&
|
srcFlow->key.l2.val == dstKey->l2.val &&
|
||||||
srcFlow->key.recircId == dstKey->recircId &&
|
srcFlow->key.recircId == dstKey->recircId &&
|
||||||
srcFlow->key.dpHash == dstKey->dpHash &&
|
srcFlow->key.dpHash == dstKey->dpHash &&
|
||||||
|
srcFlow->key.ct.state == dstKey->ct.state &&
|
||||||
|
srcFlow->key.ct.zone == dstKey->ct.zone &&
|
||||||
|
srcFlow->key.ct.mark == dstKey->ct.mark &&
|
||||||
|
!memcmp(&srcFlow->key.ct.labels, &dstKey->ct.labels,
|
||||||
|
sizeof(struct ovs_key_ct_labels)) &&
|
||||||
FlowMemoryEqual((UINT64 *)((UINT8 *)&srcFlow->key + offset),
|
FlowMemoryEqual((UINT64 *)((UINT8 *)&srcFlow->key + offset),
|
||||||
(UINT64 *) dstStart,
|
(UINT64 *) dstStart,
|
||||||
size));
|
size));
|
||||||
@ -2156,6 +2230,21 @@ OvsLookupFlow(OVS_DATAPATH *datapath,
|
|||||||
if (key->dpHash) {
|
if (key->dpHash) {
|
||||||
*hash = OvsJhashWords((UINT32*)hash, 1, key->dpHash);
|
*hash = OvsJhashWords((UINT32*)hash, 1, key->dpHash);
|
||||||
}
|
}
|
||||||
|
if (key->ct.state) {
|
||||||
|
*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.state);
|
||||||
|
}
|
||||||
|
if (key->ct.zone) {
|
||||||
|
*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone);
|
||||||
|
}
|
||||||
|
if (key->ct.mark) {
|
||||||
|
*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone);
|
||||||
|
}
|
||||||
|
if (key->ct.labels.ct_labels) {
|
||||||
|
UINT32 lblHash = OvsJhashBytes(&key->ct.labels,
|
||||||
|
sizeof(struct ovs_key_ct_labels),
|
||||||
|
0);
|
||||||
|
*hash = OvsJhashWords((UINT32*)hash, 1, lblHash);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
head = &datapath->flowTable[HASH_BUCKET(*hash)];
|
head = &datapath->flowTable[HASH_BUCKET(*hash)];
|
||||||
@ -2322,6 +2411,12 @@ ReportFlowInfo(OvsFlow *flow,
|
|||||||
|
|
||||||
info->key.recircId = flow->key.recircId;
|
info->key.recircId = flow->key.recircId;
|
||||||
info->key.dpHash = flow->key.dpHash;
|
info->key.dpHash = flow->key.dpHash;
|
||||||
|
info->key.ct.state = flow->key.ct.state;
|
||||||
|
info->key.ct.zone = flow->key.ct.zone;
|
||||||
|
info->key.ct.mark = flow->key.ct.mark;
|
||||||
|
NdisMoveMemory(&info->key.ct.labels,
|
||||||
|
&flow->key.ct.labels,
|
||||||
|
sizeof(struct ovs_key_ct_labels));
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
@ -2578,6 +2673,10 @@ OvsFlowKeyAttrSize(void)
|
|||||||
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_SKB_MARK */
|
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_SKB_MARK */
|
||||||
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_DP_HASH */
|
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_DP_HASH */
|
||||||
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_RECIRC_ID */
|
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_RECIRC_ID */
|
||||||
|
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_CT_STATE */
|
||||||
|
+ NlAttrTotalSize(2) /* OVS_KEY_ATTR_CT_ZONE */
|
||||||
|
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_CT_MARK */
|
||||||
|
+ NlAttrTotalSize(16) /* OVS_KEY_ATTR_CT_LABELS */
|
||||||
+ NlAttrTotalSize(12) /* OVS_KEY_ATTR_ETHERNET */
|
+ NlAttrTotalSize(12) /* OVS_KEY_ATTR_ETHERNET */
|
||||||
+ NlAttrTotalSize(2) /* OVS_KEY_ATTR_ETHERTYPE */
|
+ NlAttrTotalSize(2) /* OVS_KEY_ATTR_ETHERTYPE */
|
||||||
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_VLAN */
|
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_VLAN */
|
||||||
@ -2657,6 +2756,31 @@ OvsProbeSupportedFeature(POVS_MESSAGE msgIn,
|
|||||||
OVS_LOG_ERROR("Invalid recirculation ID.");
|
OVS_LOG_ERROR("Invalid recirculation ID.");
|
||||||
status = STATUS_INVALID_PARAMETER;
|
status = STATUS_INVALID_PARAMETER;
|
||||||
}
|
}
|
||||||
|
} else if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
|
||||||
|
UINT32 state = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]);
|
||||||
|
if (state & OVS_CS_F_DST_NAT || state & OVS_CS_F_SRC_NAT) {
|
||||||
|
status = STATUS_INVALID_PARAMETER;
|
||||||
|
OVS_LOG_ERROR("Contrack NAT is not supported:%d", state);
|
||||||
|
}
|
||||||
|
} else if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
|
||||||
|
UINT16 zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
|
||||||
|
if (!zone) {
|
||||||
|
OVS_LOG_ERROR("Invalid zone specified.");
|
||||||
|
status = STATUS_INVALID_PARAMETER;
|
||||||
|
}
|
||||||
|
} else if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
|
||||||
|
UINT32 mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
|
||||||
|
if (!mark) {
|
||||||
|
OVS_LOG_ERROR("Invalid ct mark specified.");
|
||||||
|
status = STATUS_INVALID_PARAMETER;
|
||||||
|
}
|
||||||
|
} else if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
|
||||||
|
const struct ovs_key_ct_labels *ct_labels;
|
||||||
|
ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
|
||||||
|
if (!ct_labels->ct_labels) {
|
||||||
|
OVS_LOG_ERROR("Invalid ct label specified.");
|
||||||
|
status = STATUS_INVALID_PARAMETER;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
OVS_LOG_ERROR("Feature not supported.");
|
OVS_LOG_ERROR("Feature not supported.");
|
||||||
status = STATUS_INVALID_PARAMETER;
|
status = STATUS_INVALID_PARAMETER;
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "precomp.h"
|
#include "precomp.h"
|
||||||
|
#include "Conntrack.h"
|
||||||
#include "Switch.h"
|
#include "Switch.h"
|
||||||
#include "Vport.h"
|
#include "Vport.h"
|
||||||
#include "Event.h"
|
#include "Event.h"
|
||||||
@ -218,6 +218,13 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
|
|||||||
goto create_switch_done;
|
goto create_switch_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status = OvsInitConntrack(switchContext);
|
||||||
|
if (status != STATUS_SUCCESS) {
|
||||||
|
OvsUninitSwitchContext(switchContext);
|
||||||
|
OVS_LOG_ERROR("Exit: Failed to initialize Connection tracking");
|
||||||
|
goto create_switch_done;
|
||||||
|
}
|
||||||
|
|
||||||
*switchContextOut = switchContext;
|
*switchContextOut = switchContext;
|
||||||
|
|
||||||
create_switch_done:
|
create_switch_done:
|
||||||
@ -249,6 +256,7 @@ OvsExtDetach(NDIS_HANDLE filterModuleContext)
|
|||||||
OvsDeleteSwitch(switchContext);
|
OvsDeleteSwitch(switchContext);
|
||||||
OvsCleanupIpHelper();
|
OvsCleanupIpHelper();
|
||||||
OvsCleanupSttDefragmentation();
|
OvsCleanupSttDefragmentation();
|
||||||
|
OvsCleanupConntrack();
|
||||||
/* This completes the cleanup, and a new attach can be handled now. */
|
/* This completes the cleanup, and a new attach can be handled now. */
|
||||||
|
|
||||||
OVS_LOG_TRACE("Exit: OvsDetach Successfully");
|
OVS_LOG_TRACE("Exit: OvsDetach Successfully");
|
||||||
|
@ -28,6 +28,12 @@ typedef uint64 __u64, __be64;
|
|||||||
typedef uint32 __u32, __be32;
|
typedef uint32 __u32, __be32;
|
||||||
typedef uint16 __u16, __be16;
|
typedef uint16 __u16, __be16;
|
||||||
typedef uint8 __u8;
|
typedef uint8 __u8;
|
||||||
|
typedef union ovs_u128 {
|
||||||
|
uint32_t u32[4];
|
||||||
|
struct {
|
||||||
|
uint64_t lo, hi;
|
||||||
|
} u64;
|
||||||
|
} ovs_u128;
|
||||||
|
|
||||||
/* Defines the userspace specific data types for file
|
/* Defines the userspace specific data types for file
|
||||||
* included within kernel only. */
|
* included within kernel only. */
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#define OVS_GRE_POOL_TAG 'GSVO'
|
#define OVS_GRE_POOL_TAG 'GSVO'
|
||||||
#define OVS_TUNFLT_POOL_TAG 'WSVO'
|
#define OVS_TUNFLT_POOL_TAG 'WSVO'
|
||||||
#define OVS_RECIRC_POOL_TAG 'CSVO'
|
#define OVS_RECIRC_POOL_TAG 'CSVO'
|
||||||
|
#define OVS_CT_POOL_TAG 'CTVO'
|
||||||
|
|
||||||
VOID *OvsAllocateMemory(size_t size);
|
VOID *OvsAllocateMemory(size_t size);
|
||||||
VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag);
|
VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag);
|
||||||
@ -68,7 +69,7 @@ VOID OvsFreeAlignedMemory(VOID *ptr);
|
|||||||
|
|
||||||
VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
|
VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
|
||||||
|
|
||||||
|
#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
|
||||||
#define MIN(_a, _b) ((_a) > (_b) ? (_b) : (_a))
|
#define MIN(_a, _b) ((_a) > (_b) ? (_b) : (_a))
|
||||||
#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0])
|
#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0])
|
||||||
#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1)
|
#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1)
|
||||||
|
@ -74,6 +74,7 @@
|
|||||||
<ClInclude Include="Actions.h" />
|
<ClInclude Include="Actions.h" />
|
||||||
<ClInclude Include="Atomic.h" />
|
<ClInclude Include="Atomic.h" />
|
||||||
<ClInclude Include="BufferMgmt.h" />
|
<ClInclude Include="BufferMgmt.h" />
|
||||||
|
<ClInclude Include="Conntrack.h" />
|
||||||
<ClInclude Include="Datapath.h" />
|
<ClInclude Include="Datapath.h" />
|
||||||
<ClInclude Include="Debug.h" />
|
<ClInclude Include="Debug.h" />
|
||||||
<ClInclude Include="DpInternal.h" />
|
<ClInclude Include="DpInternal.h" />
|
||||||
@ -175,6 +176,8 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="Actions.c" />
|
<ClCompile Include="Actions.c" />
|
||||||
<ClCompile Include="BufferMgmt.c" />
|
<ClCompile Include="BufferMgmt.c" />
|
||||||
|
<ClCompile Include="Conntrack-tcp.c" />
|
||||||
|
<ClCompile Include="Conntrack.c" />
|
||||||
<ClCompile Include="Debug.c" />
|
<ClCompile Include="Debug.c" />
|
||||||
<ClCompile Include="Driver.c" />
|
<ClCompile Include="Driver.c" />
|
||||||
<ClCompile Include="Event.c" />
|
<ClCompile Include="Event.c" />
|
||||||
|
1
debian/copyright.in
vendored
1
debian/copyright.in
vendored
@ -89,6 +89,7 @@ License:
|
|||||||
|
|
||||||
lib/getopt_long.c
|
lib/getopt_long.c
|
||||||
include/windows/getopt.h
|
include/windows/getopt.h
|
||||||
|
datapath-windows/ovsext/Conntrack-tcp.c
|
||||||
|
|
||||||
* The following files are licensed under the 3-clause BSD-license
|
* The following files are licensed under the 3-clause BSD-license
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user