2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 01:51:26 +00:00

datapath-windows: Add Connection Tracking Support

Enable support for Stateful Firewall in Hyper-V by adding a Connection
Tracking module. The module has been ported over from the userspace
implementation patch of a similar name.

The current version of the module supports ct - zone, mark and label for
TCP packets. Support for other packet formats will be added in subsequent
patches.

The conntrack-tcp module is adapted from FreeBSD's pf subsystem and hence
the BSD license. It has been ported over to match OVS Hyper-V coding
style.

Signed-off-by: Sairam Venugopal <vsairam@vmware.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Co-Authored-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Nithin Raju <nithin@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
Sairam Venugopal 2016-04-13 11:54:03 -07:00 committed by Ben Pfaff
parent ce05810425
commit 792d377d83
14 changed files with 1350 additions and 4 deletions

5
NOTICE
View File

@ -38,3 +38,8 @@ Copyright (c) 2008, 2009, 2010 Sten Spans <sten@blinkenlights.nl>
Auto Attach implementation
Copyright (c) 2014, 2015 WindRiver, Inc
Copyright (c) 2014, 2015 Avaya, Inc
TCP connection tracker from FreeBSD pf, BSD licensed
Copyright (c) 2001 Daniel Hartmeier
Copyright (c) 2002 - 2008 Henning Brauer
Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>

View File

@ -13,6 +13,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/Atomic.h \
datapath-windows/ovsext/BufferMgmt.c \
datapath-windows/ovsext/BufferMgmt.h \
datapath-windows/ovsext/Conntrack-tcp.c \
datapath-windows/ovsext/Conntrack.c \
datapath-windows/ovsext/Conntrack.h \
datapath-windows/ovsext/Datapath.c \
datapath-windows/ovsext/Datapath.h \
datapath-windows/ovsext/Debug.c \

View File

@ -17,6 +17,7 @@
#include "precomp.h"
#include "Actions.h"
#include "Conntrack.h"
#include "Debug.h"
#include "Event.h"
#include "Flow.h"
@ -1786,6 +1787,28 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
break;
}
case OVS_ACTION_ATTR_CT:
{
if (ovsFwdCtx.destPortsSizeOut > 0
|| ovsFwdCtx.tunnelTxNic != NULL
|| ovsFwdCtx.tunnelRxNic != NULL) {
status = OvsOutputBeforeSetAction(&ovsFwdCtx);
if (status != NDIS_STATUS_SUCCESS) {
dropReason = L"OVS-adding destination failed";
goto dropit;
}
}
status = OvsExecuteConntrackAction(ovsFwdCtx.curNbl, layers,
key, (const PNL_ATTR)a);
if (status != NDIS_STATUS_SUCCESS) {
OVS_LOG_ERROR("CT Action failed");
dropReason = L"OVS-conntrack action failed";
goto dropit;
}
break;
}
case OVS_ACTION_ATTR_RECIRC:
{
if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL

View File

@ -0,0 +1,532 @@
/*-
* Copyright (c) 2001 Daniel Hartmeier
* Copyright (c) 2002 - 2008 Henning Brauer
* Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2015, 2016 VMware, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Effort sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F30602-01-2-0537.
*
* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
*/
#include "Conntrack.h"
#include <stddef.h>
struct tcp_peer {
enum ct_dpif_tcp_state state;
uint32_t seqlo; /* Max sequence number sent */
uint32_t seqhi; /* Max the other end ACKd + win */
uint16_t max_win;/* largest window (pre scaling) */
uint8_t wscale; /* window scaling factor */
};
struct conn_tcp {
struct OVS_CT_ENTRY up;
struct tcp_peer peer[2];
};
enum {
TCPOPT_EOL,
TCPOPT_NOP,
TCPOPT_WINDOW = 3,
};
/* Given POINTER, the address of the given MEMBER in a STRUCT object, returns
the STRUCT object. */
#define CONTAINER_OF(POINTER, STRUCT, MEMBER) \
((STRUCT *) (void *) ((char *) (POINTER) - \
offsetof (STRUCT, MEMBER)))
/* TCP sequence numbers are 32 bit integers operated
* on with modular arithmetic. These macros can be
* used to compare such integers. */
#define SEQ_LT(a,b) ((int)((a)-(b)) < 0)
#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define SEQ_GT(a,b) ((int)((a)-(b)) > 0)
#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0)
#define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b))
#define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b))
#define TCP_FIN 0x001
#define TCP_SYN 0x002
#define TCP_RST 0x004
#define TCP_PSH 0x008
#define TCP_ACK 0x010
#define TCP_URG 0x020
#define TCP_ECE 0x040
#define TCP_CWR 0x080
#define TCP_NS 0x100
#define CT_DPIF_TCP_FLAGS \
CT_DPIF_TCP_FLAG(WINDOW_SCALE) \
CT_DPIF_TCP_FLAG(SACK_PERM) \
CT_DPIF_TCP_FLAG(CLOSE_INIT) \
CT_DPIF_TCP_FLAG(BE_LIBERAL) \
CT_DPIF_TCP_FLAG(DATA_UNACKNOWLEDGED) \
CT_DPIF_TCP_FLAG(MAXACK_SET) \
enum ct_dpif_tcp_flags_count_ {
#define CT_DPIF_TCP_FLAG(FLAG) FLAG##_COUNT_,
CT_DPIF_TCP_FLAGS
#undef CT_DPIF_TCP_FLAG
};
enum ct_dpif_tcp_flags {
#define CT_DPIF_TCP_FLAG(FLAG) CT_DPIF_TCPF_##FLAG = (1 << \
FLAG##_COUNT_),
CT_DPIF_TCP_FLAGS
#undef CT_DPIF_TCP_FLAG
};
#define CT_DPIF_TCP_STATES \
CT_DPIF_TCP_STATE(CLOSED) \
CT_DPIF_TCP_STATE(LISTEN) \
CT_DPIF_TCP_STATE(SYN_SENT) \
CT_DPIF_TCP_STATE(SYN_RECV) \
CT_DPIF_TCP_STATE(ESTABLISHED) \
CT_DPIF_TCP_STATE(CLOSE_WAIT) \
CT_DPIF_TCP_STATE(FIN_WAIT_1) \
CT_DPIF_TCP_STATE(CLOSING) \
CT_DPIF_TCP_STATE(LAST_ACK) \
CT_DPIF_TCP_STATE(FIN_WAIT_2) \
CT_DPIF_TCP_STATE(TIME_WAIT)
enum ct_dpif_tcp_state {
#define CT_DPIF_TCP_STATE(STATE) CT_DPIF_TCPS_##STATE,
CT_DPIF_TCP_STATES
#undef CT_DPIF_TCP_STATE
};
#define TCP_MAX_WSCALE 14
#define CT_WSCALE_FLAG 0x80
#define CT_WSCALE_UNKNOWN 0x40
#define CT_WSCALE_MASK 0xf
/* pf does this in in pf_normalize_tcp(), and it is called only if scrub
* is enabled. We're not scrubbing, but this check seems reasonable. */
static __inline BOOLEAN
OvsConntrackValidateTcpFlags(const TCPHdr *tcp)
{
if (tcp->syn) {
if (tcp->rst) {
return TRUE;
}
if (tcp->fin) {
/* Here pf removes the fin flag. We simply mark the packet as
* invalid */
return TRUE;
}
} else {
/* Illegal packet */
if (!(tcp->ack || tcp->rst)) {
return TRUE;
}
}
if (!(tcp->ack)) {
/* These flags are only valid if ACK is set */
if ((tcp->fin) || (tcp->psh) || (tcp->urg)) {
return TRUE;
}
}
return FALSE;
}
static __inline uint8_t
OvsTcpGetWscale(const TCPHdr *tcp)
{
unsigned len = tcp->doff * 4 - sizeof *tcp;
const uint8_t *opt = (const uint8_t *)(tcp + 1);
uint8_t wscale = 0;
uint8_t optlen;
while (len >= 3) {
if (*opt == TCPOPT_EOL) {
break;
}
switch (*opt) {
case TCPOPT_NOP:
opt++;
len--;
break;
case TCPOPT_WINDOW:
wscale = MIN(opt[2], TCP_MAX_WSCALE);
wscale |= CT_WSCALE_FLAG;
/* fall through */
default:
optlen = opt[2];
if (optlen < 2) {
optlen = 2;
}
len -= optlen;
opt += optlen;
}
}
return wscale;
}
static __inline uint32_t
OvsGetTcpPayloadLength(PNET_BUFFER_LIST nbl)
{
IPHdr *ipHdr;
char *ipBuf[sizeof(IPHdr)];
PNET_BUFFER curNb;
curNb = NET_BUFFER_LIST_FIRST_NB(nbl);
ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
1 /*no align*/, 0);
TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
return (UINT16)ntohs(ipHdr->tot_len)
- (ipHdr->ihl * 4)
- (sizeof * tcp);
}
static __inline void
OvsConntrackUpdateExpiration(struct conn_tcp *conn,
long long now,
long long interval)
{
conn->up.expiration = now + interval;
}
static __inline struct conn_tcp*
OvsCastConntrackEntryToTcpEntry(OVS_CT_ENTRY* conn)
{
return CONTAINER_OF(conn, struct conn_tcp, up);
}
enum CT_UPDATE_RES
OvsConntrackUpdateTcpEntry(struct OVS_CT_ENTRY* conn_,
const TCPHdr *tcp,
PNET_BUFFER_LIST nbl,
BOOLEAN reply,
UINT64 now)
{
struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
/* The peer that sent 'pkt' */
struct tcp_peer *src = &conn->peer[reply ? 1 : 0];
/* The peer that should receive 'pkt' */
struct tcp_peer *dst = &conn->peer[reply ? 0 : 1];
uint8_t sws = 0, dws = 0;
uint16_t win = ntohs(tcp->window);
uint32_t ack, end, seq, orig_seq;
uint32_t p_len = OvsGetTcpPayloadLength(nbl);
int ackskew;
if (OvsConntrackValidateTcpFlags(tcp)) {
return CT_UPDATE_INVALID;
}
if ((tcp->syn) && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 &&
src->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
src->state = dst->state = CT_DPIF_TCPS_CLOSED;
return CT_UPDATE_NEW;
}
if (src->wscale & CT_WSCALE_FLAG
&& dst->wscale & CT_WSCALE_FLAG
&& !(tcp->syn)) {
sws = src->wscale & CT_WSCALE_MASK;
dws = dst->wscale & CT_WSCALE_MASK;
} else if (src->wscale & CT_WSCALE_UNKNOWN
&& dst->wscale & CT_WSCALE_UNKNOWN
&& !(tcp->syn)) {
sws = TCP_MAX_WSCALE;
dws = TCP_MAX_WSCALE;
}
/*
* Sequence tracking algorithm from Guido van Rooij's paper:
* http://www.madison-gurkha.com/publications/tcp_filtering/
* tcp_filtering.ps
*/
orig_seq = seq = ntohl(tcp->seq);
if (src->state < CT_DPIF_TCPS_SYN_SENT) {
/* First packet from this end. Set its state */
ack = ntohl(tcp->ack);
end = seq + p_len;
if (tcp->syn) {
end++;
if (dst->wscale & CT_WSCALE_FLAG) {
src->wscale = OvsTcpGetWscale(tcp);
if (src->wscale & CT_WSCALE_FLAG) {
/* Remove scale factor from initial window */
sws = src->wscale & CT_WSCALE_MASK;
win = DIV_ROUND_UP((uint32_t) win, 1 << sws);
dws = dst->wscale & CT_WSCALE_MASK;
} else {
/* fixup other window */
dst->max_win <<= dst->wscale &
CT_WSCALE_MASK;
/* in case of a retrans SYN|ACK */
dst->wscale = 0;
}
}
}
if (tcp->fin) {
end++;
}
src->seqlo = seq;
src->state = CT_DPIF_TCPS_SYN_SENT;
/*
* May need to slide the window (seqhi may have been set by
* the crappy stack check or if we picked up the connection
* after establishment)
*/
if (src->seqhi == 1 ||
SEQ_GEQ(end + MAX(1, dst->max_win << dws),
src->seqhi)) {
src->seqhi = end + MAX(1, dst->max_win << dws);
}
if (win > src->max_win) {
src->max_win = win;
}
} else {
ack = ntohl(tcp->ack);
end = seq + p_len;
if (tcp->syn) {
end++;
}
if (tcp->fin) {
end++;
}
}
if ((tcp->ack) == 0) {
/* Let it pass through the ack skew check */
ack = dst->seqlo;
} else if ((ack == 0
&& (tcp->ack && tcp->rst) == (TCP_ACK|TCP_RST))
/* broken tcp stacks do not set ack */) {
/* Many stacks (ours included) will set the ACK number in an
* FIN|ACK if the SYN times out -- no sequence to ACK. */
ack = dst->seqlo;
}
if (seq == end) {
/* Ease sequencing restrictions on no data packets */
seq = src->seqlo;
end = seq;
}
ackskew = dst->seqlo - ack;
#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
if (SEQ_GEQ(src->seqhi, end)
/* Last octet inside other's window space */
&& SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))
/* Retrans: not more than one window back */
&& (ackskew >= -MAXACKWINDOW)
/* Acking not more than one reassembled fragment backwards */
&& (ackskew <= (MAXACKWINDOW << sws))
/* Acking not more than one window forward */
&& ((tcp->rst) == 0 || orig_seq == src->seqlo
|| (orig_seq == src->seqlo + 1)
|| (orig_seq + 1 == src->seqlo))) {
/* Require an exact/+1 sequence match on resets when possible */
/* update max window */
if (src->max_win < win) {
src->max_win = win;
}
/* synchronize sequencing */
if (SEQ_GT(end, src->seqlo)) {
src->seqlo = end;
}
/* slide the window of what the other end can send */
if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
dst->seqhi = ack + MAX((win << sws), 1);
}
/* update states */
if (tcp->syn && src->state < CT_DPIF_TCPS_SYN_SENT) {
src->state = CT_DPIF_TCPS_SYN_SENT;
}
if (tcp->fin && src->state < CT_DPIF_TCPS_CLOSING) {
src->state = CT_DPIF_TCPS_CLOSING;
}
if (tcp->ack) {
if (dst->state == CT_DPIF_TCPS_SYN_SENT) {
dst->state = CT_DPIF_TCPS_ESTABLISHED;
} else if (dst->state == CT_DPIF_TCPS_CLOSING) {
dst->state = CT_DPIF_TCPS_FIN_WAIT_2;
}
}
if (tcp->rst) {
src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
}
if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
&& dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
OvsConntrackUpdateExpiration(conn, now, 30 * 10000000LL);
} else if (src->state >= CT_DPIF_TCPS_CLOSING
&& dst->state >= CT_DPIF_TCPS_CLOSING) {
OvsConntrackUpdateExpiration(conn, now, 45 * 10000000LL);
} else if (src->state < CT_DPIF_TCPS_ESTABLISHED
|| dst->state < CT_DPIF_TCPS_ESTABLISHED) {
OvsConntrackUpdateExpiration(conn, now, 30 * 10000000LL);
} else if (src->state >= CT_DPIF_TCPS_CLOSING
|| dst->state >= CT_DPIF_TCPS_CLOSING) {
OvsConntrackUpdateExpiration(conn, now, 15 * 60 * 10000000LL);
} else {
OvsConntrackUpdateExpiration(conn, now, 24 * 60 * 60 * 10000000LL);
}
} else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
|| dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
|| src->state >= CT_DPIF_TCPS_FIN_WAIT_2)
&& SEQ_GEQ(src->seqhi + MAXACKWINDOW, end)
/* Within a window forward of the originating packet */
&& SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
/* Within a window backward of the originating packet */
/*
* This currently handles three situations:
* 1) Stupid stacks will shotgun SYNs before their peer
* replies.
* 2) When PF catches an already established stream (the
* firewall rebooted, the state table was flushed, routes
* changed...)
* 3) Packets get funky immediately after the connection
* closes (this should catch Solaris spurious ACK|FINs
* that web servers like to spew after a close)
*
* This must be a little more careful than the above code
* since packet floods will also be caught here. We don't
* update the TTL here to mitigate the damage of a packet
* flood and so the same code can handle awkward establishment
* and a loosened connection close.
* In the establishment case, a correct peer response will
* validate the connection, go through the normal state code
* and keep updating the state TTL.
*/
/* update max window */
if (src->max_win < win) {
src->max_win = win;
}
/* synchronize sequencing */
if (SEQ_GT(end, src->seqlo)) {
src->seqlo = end;
}
/* slide the window of what the other end can send */
if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
dst->seqhi = ack + MAX((win << sws), 1);
}
/*
* Cannot set dst->seqhi here since this could be a shotgunned
* SYN and not an already established connection.
*/
if (tcp->fin && src->state < CT_DPIF_TCPS_CLOSING) {
src->state = CT_DPIF_TCPS_CLOSING;
}
if (tcp->rst) {
src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
}
} else {
return CT_UPDATE_INVALID;
}
return CT_UPDATE_VALID;
}
BOOLEAN
OvsConntrackValidateTcpPacket(const TCPHdr *tcp)
{
if (tcp == NULL || OvsConntrackValidateTcpFlags(tcp)) {
return FALSE;
}
/* A syn+ack is not allowed to create a connection. We want to allow
* totally new connections (syn) or already established, not partially
* open (syn+ack). */
if ((tcp->syn) && (tcp->ack)) {
return FALSE;
}
return TRUE;
}
OVS_CT_ENTRY *
OvsNewTcpConntrack(const TCPHdr *tcp,
PNET_BUFFER_LIST nbl,
UINT64 now)
{
struct conn_tcp* newconn = NULL;
struct tcp_peer *src, *dst;
newconn = OvsAllocateMemoryWithTag(sizeof(struct conn_tcp),
OVS_CT_POOL_TAG);
newconn->up = (OVS_CT_ENTRY) {0};
src = &newconn->peer[0];
dst = &newconn->peer[1];
src->seqlo = ntohl(tcp->seq);
src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1;
if (tcp->syn) {
src->seqhi++;
src->wscale = OvsTcpGetWscale(tcp);
} else {
src->wscale = CT_WSCALE_UNKNOWN;
dst->wscale = CT_WSCALE_UNKNOWN;
}
src->max_win = MAX(ntohs(tcp->window), 1);
if (src->wscale & CT_WSCALE_MASK) {
/* Remove scale factor from initial window */
uint8_t sws = src->wscale & CT_WSCALE_MASK;
src->max_win = DIV_ROUND_UP((uint32_t) src->max_win,
1 << sws);
}
if (tcp->fin) {
src->seqhi++;
}
dst->seqhi = 1;
dst->max_win = 1;
src->state = CT_DPIF_TCPS_SYN_SENT;
dst->state = CT_DPIF_TCPS_CLOSED;
OvsConntrackUpdateExpiration(newconn, now, CT_ENTRY_TIMEOUT);
return &newconn->up;
}

View File

@ -0,0 +1,530 @@
/*
* Copyright (c) 2015, 2016 VMware, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef OVS_DBG_MOD
#undef OVS_DBG_MOD
#endif
#define OVS_DBG_MOD OVS_DBG_CONTRK
#include "Conntrack.h"
#include "Jhash.h"
#include "PacketParser.h"
#include "Debug.h"
typedef struct _OVS_CT_THREAD_CTX {
KEVENT event;
PVOID threadObject;
UINT32 exit;
} OVS_CT_THREAD_CTX, *POVS_CT_THREAD_CTX;
KSTART_ROUTINE ovsConntrackEntryCleaner;
static PLIST_ENTRY ovsConntrackTable;
static OVS_CT_THREAD_CTX ctThreadCtx;
static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
/*
*----------------------------------------------------------------------------
* OvsInitConntrack
* Initialize the components used by Connection Tracking
*----------------------------------------------------------------------------
*/
NTSTATUS
OvsInitConntrack(POVS_SWITCH_CONTEXT context)
{
NTSTATUS status;
HANDLE threadHandle = NULL;
/* Init the sync-lock */
ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
if (ovsConntrackLockObj == NULL) {
return STATUS_INSUFFICIENT_RESOURCES;
}
/* Init the Hash Buffer */
ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
* CT_HASH_TABLE_SIZE,
OVS_CT_POOL_TAG);
if (ovsConntrackTable == NULL) {
NdisFreeRWLock(ovsConntrackLockObj);
ovsConntrackLockObj = NULL;
return STATUS_INSUFFICIENT_RESOURCES;
}
for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
InitializeListHead(&ovsConntrackTable[i]);
}
/* Init CT Cleaner Thread */
KeInitializeEvent(&ctThreadCtx.event, NotificationEvent, FALSE);
status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
NULL, ovsConntrackEntryCleaner,
&ctThreadCtx);
if (status != STATUS_SUCCESS) {
NdisFreeRWLock(ovsConntrackLockObj);
ovsConntrackLockObj = NULL;
OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
ovsConntrackTable = NULL;
return status;
}
ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
&ctThreadCtx.threadObject, NULL);
ZwClose(threadHandle);
threadHandle = NULL;
return STATUS_SUCCESS;
}
/*
*----------------------------------------------------------------------------
* OvsCleanupConntrack
* Cleanup memory and thread that were spawned for Connection tracking
*----------------------------------------------------------------------------
*/
VOID
OvsCleanupConntrack(VOID)
{
LOCK_STATE_EX lockState;
NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
ctThreadCtx.exit = 1;
KeSetEvent(&ctThreadCtx.event, 0, FALSE);
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
KernelMode, FALSE, NULL);
ObDereferenceObject(ctThreadCtx.threadObject);
if (ovsConntrackTable) {
OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
ovsConntrackTable = NULL;
}
NdisFreeRWLock(ovsConntrackLockObj);
ovsConntrackLockObj = NULL;
}
static __inline VOID
OvsCtKeyReverse(OVS_CT_KEY *key)
{
struct ct_endpoint tmp;
tmp = key->src;
key->src = key->dst;
key->dst = tmp;
}
static __inline VOID
OvsCtUpdateFlowKey(struct OvsFlowKey *key,
UINT32 state,
UINT16 zone,
UINT32 mark,
struct ovs_key_ct_labels *labels)
{
key->ct.state = state | OVS_CS_F_TRACKED;
key->ct.zone = zone;
key->ct.mark = mark;
if (labels) {
NdisMoveMemory(&key->ct.labels, labels,
sizeof(struct ovs_key_ct_labels));
} else {
memset(&key->ct.labels, 0,
sizeof(struct ovs_key_ct_labels));
}
}
static __inline POVS_CT_ENTRY
OvsCtEntryCreate(const TCPHdr *tcp,
PNET_BUFFER_LIST curNbl,
OvsConntrackKeyLookupCtx *ctx,
OvsFlowKey *key,
BOOLEAN commit,
UINT64 currentTime)
{
POVS_CT_ENTRY entry = NULL;
UINT32 state = 0;
if (!OvsConntrackValidateTcpPacket(tcp)) {
state |= OVS_CS_F_INVALID;
OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
return entry;
}
state |= OVS_CS_F_NEW;
if (commit) {
entry = OvsNewTcpConntrack(tcp, curNbl, currentTime);
NdisMoveMemory(&entry->key, &ctx->key, sizeof (OVS_CT_KEY));
NdisMoveMemory(&entry->rev_key, &ctx->key, sizeof (OVS_CT_KEY));
OvsCtKeyReverse(&entry->rev_key);
InsertHeadList(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK],
&entry->link);
}
OvsCtUpdateFlowKey(key, state, ctx->key.zone, 0, NULL);
return entry;
}
static __inline VOID
OvsCtEntryDelete(POVS_CT_ENTRY entry)
{
RemoveEntryList(&entry->link);
OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
}
static __inline BOOLEAN
OvsCtEntryExpired(POVS_CT_ENTRY entry)
{
if (entry == NULL)
return TRUE;
UINT64 currentTime;
NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
return entry->expiration < currentTime;
}
static __inline NDIS_STATUS
OvsDetectCtPacket(OvsFlowKey *key)
{
/* Currently we support only Unfragmented TCP packets */
switch (ntohs(key->l2.dlType)) {
case ETH_TYPE_IPV4:
if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
return NDIS_STATUS_NOT_SUPPORTED;
}
if (key->ipKey.nwProto != IPPROTO_TCP) {
return NDIS_STATUS_NOT_SUPPORTED;
}
return NDIS_STATUS_SUCCESS;
case ETH_TYPE_IPV6:
return NDIS_STATUS_NOT_SUPPORTED;
}
return NDIS_STATUS_NOT_SUPPORTED;
}
static __inline BOOLEAN
OvsCtKeyAreSame(OVS_CT_KEY ctxKey, OVS_CT_KEY entryKey)
{
return ((ctxKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
(ctxKey.src.addr.ipv4_aligned == entryKey.src.addr.ipv4_aligned) &&
(ctxKey.src.port == entryKey.src.port) &&
(ctxKey.dst.addr.ipv4 == entryKey.dst.addr.ipv4) &&
(ctxKey.dst.addr.ipv4_aligned == entryKey.dst.addr.ipv4_aligned) &&
(ctxKey.dst.port == entryKey.dst.port) &&
(ctxKey.dl_type == entryKey.dl_type) &&
(ctxKey.nw_proto == entryKey.nw_proto) &&
(ctxKey.zone == entryKey.zone));
}
static __inline POVS_CT_ENTRY
OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
{
PLIST_ENTRY link;
POVS_CT_ENTRY entry;
BOOLEAN reply = FALSE;
POVS_CT_ENTRY found = NULL;
LIST_FORALL(&ovsConntrackTable[ctx->hash & CT_HASH_TABLE_MASK], link) {
entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
if (OvsCtKeyAreSame(ctx->key,entry->key)) {
found = entry;
reply = FALSE;
break;
}
if (OvsCtKeyAreSame(ctx->key,entry->rev_key)) {
found = entry;
reply = TRUE;
break;
}
}
if (found) {
if (OvsCtEntryExpired(found)) {
found = NULL;
} else {
ctx->reply = reply;
}
}
ctx->entry = found;
return found;
}
static __inline VOID
OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
UINT16 zone,
OvsConntrackKeyLookupCtx *ctx)
{
UINT32 hsrc, hdst,hash;
ctx->key.zone = zone;
ctx->key.dl_type = flowKey->l2.dlType;
if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
ctx->key.src.addr.ipv4 = flowKey->ipKey.nwSrc;
ctx->key.dst.addr.ipv4 = flowKey->ipKey.nwDst;
ctx->key.nw_proto = flowKey->ipKey.nwProto;
ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
} else if (flowKey->l2.dlType == htons(ETH_TYPE_IPV6)) {
ctx->key.src.addr.ipv6 = flowKey->ipv6Key.ipv6Src;
ctx->key.dst.addr.ipv6 = flowKey->ipv6Key.ipv6Dst;
ctx->key.nw_proto = flowKey->ipv6Key.nwProto;
ctx->key.src.port = flowKey->ipv6Key.l4.tpSrc;
ctx->key.dst.port = flowKey->ipv6Key.l4.tpDst;
}
/* Related bit is set for ICMP and FTP (Not supported)*/
ctx->related = FALSE;
hsrc = OvsJhashBytes((UINT32*) &ctx->key.src, sizeof(ctx->key.src), 0);
hdst = OvsJhashBytes((UINT32*) &ctx->key.dst, sizeof(ctx->key.dst), 0);
hash = hsrc ^ hdst; /* TO identify reverse traffic */
ctx->hash = OvsJhashBytes((uint32_t *) &ctx->key.dst + 1,
((uint32_t *) (&ctx->key + 1) -
(uint32_t *) (&ctx->key.dst + 1)),
hash);
}
/*
*----------------------------------------------------------------------------
* OvsProcessConntrackEntry
* Check the TCP flags and set the ct_state of the entry
*----------------------------------------------------------------------------
*/
static __inline POVS_CT_ENTRY
OvsProcessConntrackEntry(PNET_BUFFER_LIST curNbl,
const TCPHdr *tcp,
OvsConntrackKeyLookupCtx *ctx,
OvsFlowKey *key,
UINT16 zone,
BOOLEAN commit,
UINT64 currentTime)
{
POVS_CT_ENTRY entry = ctx->entry;
UINT32 state = 0;
/* If an entry was found, update the state based on TCP flags */
if (ctx->related) {
state |= OVS_CS_F_RELATED;
if (ctx->reply) {
state = OVS_CS_F_REPLY_DIR;
}
} else {
CT_UPDATE_RES result;
result = OvsConntrackUpdateTcpEntry(entry, tcp, curNbl,
ctx->reply, currentTime);
switch (result) {
case CT_UPDATE_VALID:
state |= OVS_CS_F_ESTABLISHED;
if (ctx->reply) {
state |= OVS_CS_F_REPLY_DIR;
}
break;
case CT_UPDATE_INVALID:
state |= OVS_CS_F_INVALID;
break;
case CT_UPDATE_NEW:
//Delete and update the Conntrack
OvsCtEntryDelete(ctx->entry);
ctx->entry = NULL;
entry = OvsCtEntryCreate(tcp, curNbl, ctx, key,
commit, currentTime);
break;
}
}
/* Copy mark and label from entry into flowKey. If actions specify
different mark and label, update the flowKey. */
OvsCtUpdateFlowKey(key, state, zone, entry->mark, &entry->labels);
return entry;
}
static __inline VOID
OvsConntrackSetMark(OvsFlowKey *key,
POVS_CT_ENTRY entry,
UINT32 value,
UINT32 mask)
{
UINT32 newMark;
newMark = value | (entry->mark & ~(mask));
if (entry->mark != newMark) {
entry->mark = newMark;
key->ct.mark = newMark;
}
}
static __inline void
OvsConntrackSetLabels(OvsFlowKey *key,
POVS_CT_ENTRY entry,
struct ovs_key_ct_labels *val,
struct ovs_key_ct_labels *mask)
{
ovs_u128 v, m, pktMdLabel;
memcpy(&v, val, sizeof v);
memcpy(&m, mask, sizeof m);
pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo));
pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi));
NdisMoveMemory(&entry->labels, &pktMdLabel,
sizeof(struct ovs_key_ct_labels));
NdisMoveMemory(&key->ct.labels, &pktMdLabel,
sizeof(struct ovs_key_ct_labels));
}
static __inline NDIS_STATUS
OvsCtExecute_(PNET_BUFFER_LIST curNbl,
OvsFlowKey *key,
OVS_PACKET_HDR_INFO *layers,
BOOLEAN commit,
UINT16 zone,
MD_MARK *mark,
MD_LABELS *labels)
{
NDIS_STATUS status = NDIS_STATUS_SUCCESS;
POVS_CT_ENTRY entry = NULL;
OvsConntrackKeyLookupCtx ctx = { 0 };
TCPHdr tcpStorage;
UINT64 currentTime;
LOCK_STATE_EX lockState;
const TCPHdr *tcp;
tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage);
NdisGetCurrentSystemTime((LARGE_INTEGER *) &currentTime);
/* Retrieve the Conntrack Key related fields from packet */
OvsCtSetupLookupCtx(key, zone, &ctx);
NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
/* Lookup Conntrack entries for a matching entry */
entry = OvsCtLookup(&ctx);
if (!entry) {
/* If no matching entry was found, create one and add New state */
entry = OvsCtEntryCreate(tcp, curNbl, &ctx,
key, commit, currentTime);
} else {
/* Process the entry and update CT flags */
entry = OvsProcessConntrackEntry(curNbl, tcp, &ctx, key,
zone, commit, currentTime);
}
if (entry && mark) {
OvsConntrackSetMark(key, entry, mark->value, mark->mask);
}
if (entry && labels) {
OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask);
}
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
return status;
}
/*
*---------------------------------------------------------------------------
* OvsExecuteConntrackAction
* Executes Conntrack actions XXX - Add more
*---------------------------------------------------------------------------
*/
NDIS_STATUS
OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
OVS_PACKET_HDR_INFO *layers,
OvsFlowKey *key,
const PNL_ATTR a)
{
PNL_ATTR ctAttr;
BOOLEAN commit = FALSE;
UINT16 zone = 0;
MD_MARK *mark = NULL;
MD_LABELS *labels = NULL;
NDIS_STATUS status;
status = OvsDetectCtPacket(key);
if (status != NDIS_STATUS_SUCCESS) {
return status;
}
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
if (ctAttr) {
zone = NlAttrGetU16(ctAttr);
}
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
if (ctAttr) {
commit = TRUE;
}
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
if (ctAttr) {
mark = NlAttrGet(ctAttr);
}
ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
if (ctAttr) {
labels = NlAttrGet(ctAttr);
}
status = OvsCtExecute_(curNbl, key, layers,
commit, zone, mark, labels);
return status;
}
/*
*----------------------------------------------------------------------------
* OvsConntrackEnrtyCleaner
* Runs periodically and cleans up the connection tracker
*----------------------------------------------------------------------------
*/
VOID
ovsConntrackEntryCleaner(PVOID data)
{
POVS_CT_THREAD_CTX context = (POVS_CT_THREAD_CTX)data;
PLIST_ENTRY link, next;
POVS_CT_ENTRY entry;
BOOLEAN success = TRUE;
while (success) {
LOCK_STATE_EX lockState;
NdisAcquireRWLockWrite(ovsConntrackLockObj, &lockState, 0);
if (context->exit) {
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
break;
}
/* Set the timeout for the thread and cleanup */
UINT64 currentTime, threadSleepTimeout;
NdisGetCurrentSystemTime((LARGE_INTEGER *)&currentTime);
threadSleepTimeout = currentTime + CT_CLEANUP_INTERVAL;
for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
LIST_FORALL_SAFE(&ovsConntrackTable[i], link, next) {
entry = CONTAINING_RECORD(link, OVS_CT_ENTRY, link);
if (entry->expiration < currentTime) {
OvsCtEntryDelete(entry);
}
}
}
NdisReleaseRWLock(ovsConntrackLockObj, &lockState);
KeWaitForSingleObject(&context->event, Executive, KernelMode,
FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
}
PsTerminateSystemThread(STATUS_SUCCESS);
}

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2015, 2016 VMware, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __OVS_CONNTRACK_H_
#define __OVS_CONNTRACK_H_ 1
#include "precomp.h"
#include "Flow.h"
struct ct_addr {
union {
ovs_be32 ipv4;
struct in6_addr ipv6;
uint32_t ipv4_aligned;
struct in6_addr ipv6_aligned;
};
};
struct ct_endpoint {
struct ct_addr addr;
ovs_be16 port;
UINT16 pad;
};
typedef enum CT_UPDATE_RES {
CT_UPDATE_INVALID,
CT_UPDATE_VALID,
CT_UPDATE_NEW,
} CT_UPDATE_RES;
/* Metadata mark for masked write to conntrack mark */
typedef struct MD_MARK {
UINT32 value;
UINT32 mask;
} MD_MARK;
/* Metadata label for masked write to conntrack label. */
typedef struct MD_LABELS {
struct ovs_key_ct_labels value;
struct ovs_key_ct_labels mask;
} MD_LABELS;
typedef struct _OVS_CT_KEY {
struct ct_endpoint src;
struct ct_endpoint dst;
UINT16 dl_type;
UINT8 nw_proto;
UINT16 zone;
} OVS_CT_KEY, *POVS_CT_KEY;
typedef struct OVS_CT_ENTRY {
OVS_CT_KEY key;
OVS_CT_KEY rev_key;
UINT64 expiration;
LIST_ENTRY link;
UINT32 mark;
struct ovs_key_ct_labels labels;
} OVS_CT_ENTRY, *POVS_CT_ENTRY;
typedef struct OvsConntrackKeyLookupCtx {
OVS_CT_KEY key;
POVS_CT_ENTRY entry;
UINT32 hash;
BOOLEAN reply;
BOOLEAN related;
} OvsConntrackKeyLookupCtx;
#define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
#define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
#define CT_ENTRY_TIMEOUT (2 * 600000000) // 2m
#define CT_CLEANUP_INTERVAL (2 * 600000000) // 2m
VOID OvsCleanupConntrack(VOID);
NTSTATUS OvsInitConntrack(POVS_SWITCH_CONTEXT context);
NDIS_STATUS OvsExecuteConntrackAction(PNET_BUFFER_LIST curNbl,
OVS_PACKET_HDR_INFO *layers,
OvsFlowKey *key,
const PNL_ATTR a);
BOOLEAN OvsConntrackValidateTcpPacket(const TCPHdr *tcp);
OVS_CT_ENTRY * OvsNewTcpConntrack(const TCPHdr *tcp,
PNET_BUFFER_LIST nbl,
UINT64 now);
enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(struct OVS_CT_ENTRY* conn_,
const TCPHdr *tcp,
PNET_BUFFER_LIST nbl,
BOOLEAN reply,
UINT64 now);
#endif /* __OVS_CONNTRACK_H_ */

View File

@ -40,6 +40,7 @@
#define OVS_DBG_NETLINK BIT32(20)
#define OVS_DBG_TUNFLT BIT32(21)
#define OVS_DBG_STT BIT32(22)
#define OVS_DBG_CONTRK BIT32(23)
#define OVS_DBG_RESERVED BIT32(31)
//Please add above OVS_DBG_RESERVED.

View File

@ -167,6 +167,13 @@ typedef __declspec(align(8)) struct OvsFlowKey {
};
UINT32 recircId; /* Recirculation ID. */
UINT32 dpHash; /* Datapath calculated hash value. */
struct {
/* Connection tracking fields. */
UINT16 zone;
UINT32 mark;
UINT32 state;
struct ovs_key_ct_labels labels;
} ct; /* Connection Tracking Flags */
} OvsFlowKey;
#define OVS_WIN_TUNNEL_KEY_SIZE (sizeof (OvsIPv4TunnelKey))

View File

@ -172,7 +172,17 @@ const NL_POLICY nlFlowKeyPolicy[] = {
.maxLen = 4, .optional = TRUE},
[OVS_KEY_ATTR_RECIRC_ID] = {.type = NL_A_UNSPEC, .minLen = 4,
.maxLen = 4, .optional = TRUE},
[OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE}
[OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE},
[OVS_KEY_ATTR_CT_STATE] = {.type = NL_A_UNSPEC, .minLen = 4,
.maxLen = 4, .optional = TRUE},
[OVS_KEY_ATTR_CT_ZONE] = {.type = NL_A_UNSPEC, .minLen = 2,
.maxLen = 2, .optional = TRUE},
[OVS_KEY_ATTR_CT_MARK] = {.type = NL_A_UNSPEC, .minLen = 4,
.maxLen = 4, .optional = TRUE},
[OVS_KEY_ATTR_CT_LABELS] = {.type = NL_A_UNSPEC,
.minLen = sizeof(struct ovs_key_ct_labels),
.maxLen = sizeof(struct ovs_key_ct_labels),
.optional = TRUE}
};
const UINT32 nlFlowKeyPolicyLen = ARRAY_SIZE(nlFlowKeyPolicy);
@ -229,7 +239,8 @@ const NL_POLICY nlFlowActionPolicy[] = {
.maxLen = sizeof(struct ovs_action_hash),
.optional = TRUE},
[OVS_ACTION_ATTR_SET] = {.type = NL_A_VAR_LEN, .optional = TRUE},
[OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE}
[OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE},
[OVS_ACTION_ATTR_CT] = {.type = NL_A_VAR_LEN, .optional = TRUE}
};
/*
@ -850,6 +861,28 @@ MapFlowKeyToNlKey(PNL_BUFFER nlBuf,
goto done;
}
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_CT_STATE,
flowKey->ct.state)) {
rc = STATUS_UNSUCCESSFUL;
goto done;
}
if (!NlMsgPutTailU16(nlBuf, OVS_KEY_ATTR_CT_ZONE,
flowKey->ct.zone)) {
rc = STATUS_UNSUCCESSFUL;
goto done;
}
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_CT_MARK,
flowKey->ct.mark)) {
rc = STATUS_UNSUCCESSFUL;
goto done;
}
if (!NlMsgPutTailUnspec(nlBuf, OVS_KEY_ATTR_CT_LABELS,
(PCHAR)(&flowKey->ct.labels),
sizeof(struct ovs_key_ct_labels))) {
rc = STATUS_UNSUCCESSFUL;
goto done;
}
if (flowKey->dpHash) {
if (!NlMsgPutTailU32(nlBuf, OVS_KEY_ATTR_DP_HASH,
flowKey->dpHash)) {
@ -1386,6 +1419,24 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
destKey->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
}
if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
destKey->ct.state = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]));
}
if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
destKey->ct.zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
}
if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
destKey->ct.mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
}
if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
const struct ovs_key_ct_labels *ct_labels;
ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
RtlCopyMemory(&destKey->ct.labels, ct_labels, sizeof(struct ovs_key_ct_labels));
}
/* ===== L2 headers ===== */
destKey->l2.inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
@ -1774,6 +1825,24 @@ OvsGetFlowMetadata(OvsFlowKey *key,
key->dpHash = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_DP_HASH]);
}
if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
key->ct.state = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]));
}
if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
key->ct.zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
}
if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
key->ct.mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
}
if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
const struct ovs_key_ct_labels *ct_labels;
ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
RtlCopyMemory(&key->ct.labels, ct_labels, sizeof(struct ovs_key_ct_labels));
}
return status;
}
@ -2059,6 +2128,11 @@ FlowEqual(OvsFlow *srcFlow,
srcFlow->key.l2.val == dstKey->l2.val &&
srcFlow->key.recircId == dstKey->recircId &&
srcFlow->key.dpHash == dstKey->dpHash &&
srcFlow->key.ct.state == dstKey->ct.state &&
srcFlow->key.ct.zone == dstKey->ct.zone &&
srcFlow->key.ct.mark == dstKey->ct.mark &&
!memcmp(&srcFlow->key.ct.labels, &dstKey->ct.labels,
sizeof(struct ovs_key_ct_labels)) &&
FlowMemoryEqual((UINT64 *)((UINT8 *)&srcFlow->key + offset),
(UINT64 *) dstStart,
size));
@ -2156,6 +2230,21 @@ OvsLookupFlow(OVS_DATAPATH *datapath,
if (key->dpHash) {
*hash = OvsJhashWords((UINT32*)hash, 1, key->dpHash);
}
if (key->ct.state) {
*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.state);
}
if (key->ct.zone) {
*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone);
}
if (key->ct.mark) {
*hash = OvsJhashWords((UINT32*)hash, 1, key->ct.zone);
}
if (key->ct.labels.ct_labels) {
UINT32 lblHash = OvsJhashBytes(&key->ct.labels,
sizeof(struct ovs_key_ct_labels),
0);
*hash = OvsJhashWords((UINT32*)hash, 1, lblHash);
}
}
head = &datapath->flowTable[HASH_BUCKET(*hash)];
@ -2322,6 +2411,12 @@ ReportFlowInfo(OvsFlow *flow,
info->key.recircId = flow->key.recircId;
info->key.dpHash = flow->key.dpHash;
info->key.ct.state = flow->key.ct.state;
info->key.ct.zone = flow->key.ct.zone;
info->key.ct.mark = flow->key.ct.mark;
NdisMoveMemory(&info->key.ct.labels,
&flow->key.ct.labels,
sizeof(struct ovs_key_ct_labels));
return status;
}
@ -2578,6 +2673,10 @@ OvsFlowKeyAttrSize(void)
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_SKB_MARK */
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_DP_HASH */
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_CT_STATE */
+ NlAttrTotalSize(2) /* OVS_KEY_ATTR_CT_ZONE */
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_CT_MARK */
+ NlAttrTotalSize(16) /* OVS_KEY_ATTR_CT_LABELS */
+ NlAttrTotalSize(12) /* OVS_KEY_ATTR_ETHERNET */
+ NlAttrTotalSize(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ NlAttrTotalSize(4) /* OVS_KEY_ATTR_VLAN */
@ -2657,6 +2756,31 @@ OvsProbeSupportedFeature(POVS_MESSAGE msgIn,
OVS_LOG_ERROR("Invalid recirculation ID.");
status = STATUS_INVALID_PARAMETER;
}
} else if (keyAttrs[OVS_KEY_ATTR_CT_STATE]) {
UINT32 state = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_STATE]);
if (state & OVS_CS_F_DST_NAT || state & OVS_CS_F_SRC_NAT) {
status = STATUS_INVALID_PARAMETER;
OVS_LOG_ERROR("Contrack NAT is not supported:%d", state);
}
} else if (keyAttrs[OVS_KEY_ATTR_CT_ZONE]) {
UINT16 zone = (NlAttrGetU16(keyAttrs[OVS_KEY_ATTR_CT_ZONE]));
if (!zone) {
OVS_LOG_ERROR("Invalid zone specified.");
status = STATUS_INVALID_PARAMETER;
}
} else if (keyAttrs[OVS_KEY_ATTR_CT_MARK]) {
UINT32 mark = (NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_CT_MARK]));
if (!mark) {
OVS_LOG_ERROR("Invalid ct mark specified.");
status = STATUS_INVALID_PARAMETER;
}
} else if (keyAttrs[OVS_KEY_ATTR_CT_LABELS]) {
const struct ovs_key_ct_labels *ct_labels;
ct_labels = NlAttrGet(keyAttrs[OVS_KEY_ATTR_CT_LABELS]);
if (!ct_labels->ct_labels) {
OVS_LOG_ERROR("Invalid ct label specified.");
status = STATUS_INVALID_PARAMETER;
}
} else {
OVS_LOG_ERROR("Feature not supported.");
status = STATUS_INVALID_PARAMETER;

View File

@ -20,7 +20,7 @@
*/
#include "precomp.h"
#include "Conntrack.h"
#include "Switch.h"
#include "Vport.h"
#include "Event.h"
@ -218,6 +218,13 @@ OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
goto create_switch_done;
}
status = OvsInitConntrack(switchContext);
if (status != STATUS_SUCCESS) {
OvsUninitSwitchContext(switchContext);
OVS_LOG_ERROR("Exit: Failed to initialize Connection tracking");
goto create_switch_done;
}
*switchContextOut = switchContext;
create_switch_done:
@ -249,6 +256,7 @@ OvsExtDetach(NDIS_HANDLE filterModuleContext)
OvsDeleteSwitch(switchContext);
OvsCleanupIpHelper();
OvsCleanupSttDefragmentation();
OvsCleanupConntrack();
/* This completes the cleanup, and a new attach can be handled now. */
OVS_LOG_TRACE("Exit: OvsDetach Successfully");

View File

@ -28,6 +28,12 @@ typedef uint64 __u64, __be64;
typedef uint32 __u32, __be32;
typedef uint16 __u16, __be16;
typedef uint8 __u8;
typedef union ovs_u128 {
uint32_t u32[4];
struct {
uint64_t lo, hi;
} u64;
} ovs_u128;
/* Defines the userspace specific data types for file
* included within kernel only. */

View File

@ -37,6 +37,7 @@
#define OVS_GRE_POOL_TAG 'GSVO'
#define OVS_TUNFLT_POOL_TAG 'WSVO'
#define OVS_RECIRC_POOL_TAG 'CSVO'
#define OVS_CT_POOL_TAG 'CTVO'
VOID *OvsAllocateMemory(size_t size);
VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag);
@ -68,7 +69,7 @@ VOID OvsFreeAlignedMemory(VOID *ptr);
VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
#define MIN(_a, _b) ((_a) > (_b) ? (_b) : (_a))
#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0])
#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1)

View File

@ -74,6 +74,7 @@
<ClInclude Include="Actions.h" />
<ClInclude Include="Atomic.h" />
<ClInclude Include="BufferMgmt.h" />
<ClInclude Include="Conntrack.h" />
<ClInclude Include="Datapath.h" />
<ClInclude Include="Debug.h" />
<ClInclude Include="DpInternal.h" />
@ -175,6 +176,8 @@
<ItemGroup>
<ClCompile Include="Actions.c" />
<ClCompile Include="BufferMgmt.c" />
<ClCompile Include="Conntrack-tcp.c" />
<ClCompile Include="Conntrack.c" />
<ClCompile Include="Debug.c" />
<ClCompile Include="Driver.c" />
<ClCompile Include="Event.c" />

1
debian/copyright.in vendored
View File

@ -89,6 +89,7 @@ License:
lib/getopt_long.c
include/windows/getopt.h
datapath-windows/ovsext/Conntrack-tcp.c
* The following files are licensed under the 3-clause BSD-license