From 433637881ca5d1b71eed693afef162df8e529bff Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 7 May 2015 10:17:26 -0700 Subject: [PATCH 001/146] datapath: define compat __skb_gso_segment() OVS correctly define skb_gso_segment() to handle MPLS and VLAN segmentation correctly. But OVS also uses __skb_gso_segment() in some cases. Following patch defines compat __skb_gso_segment() to handle all segmentation cases. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- datapath/linux/compat/gso.c | 28 +++++++++---- .../linux/compat/include/linux/netdevice.h | 41 ++++++++++++------- datapath/linux/compat/netdevice.c | 19 ++++++--- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c index 552e7485c..2c19b5890 100644 --- a/datapath/linux/compat/gso.c +++ b/datapath/linux/compat/gso.c @@ -52,7 +52,7 @@ MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets"); #define vlan_tso true #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) +#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION static bool dev_supports_vlan_tx(struct net_device *dev) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) @@ -66,16 +66,16 @@ static bool dev_supports_vlan_tx(struct net_device *dev) } /* Strictly this is not needed and will be optimised out - * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0). + * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0). * It is here to make things explicit should the compatibility * code be extended in some way prior extending its life-span - * beyond v3.16. + * beyond v3.19. */ static bool supports_mpls_gso(void) { /* MPLS GSO was introduced in v3.11, however it was not correctly - * activated using mpls_features until v3.16. */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) + * activated using mpls_features until v3.19. */ +#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION return true; #else return false; @@ -120,17 +120,17 @@ int rpl_dev_queue_xmit(struct sk_buff *skb) /* As of v3.11 the kernel provides an mpls_features field in * struct net_device which allows devices to advertise which * features its supports for MPLS. This value defaults to - * NETIF_F_SG and as of v3.16. + * NETIF_F_SG and as of v3.19. * * This compatibility code is intended for kernels older - * than v3.16 that do not support MPLS GSO and do not + * than v3.19 that do not support MPLS GSO and do not * use mpls_features. Thus this code uses NETIF_F_SG * directly in place of mpls_features. */ if (mpls) features &= NETIF_F_SG; - if (netif_needs_gso(skb, features)) { + if (netif_needs_gso(skb->dev, skb, features)) { struct sk_buff *nskb; nskb = skb_gso_segment(skb, features); @@ -168,7 +168,7 @@ drop: return err; } EXPORT_SYMBOL_GPL(rpl_dev_queue_xmit); -#endif /* 3.16 */ +#endif /* OVS_USE_COMPAT_GSO_SEGMENTATION */ #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) static __be16 __skb_network_protocol(struct sk_buff *skb) @@ -219,7 +219,17 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, * make copy of it to restore it back. */ memcpy(cb, skb->cb, sizeof(cb)); + /* We are handling offloads by segmenting l3 packet, so + * no need to call OVS compat segmentation function. */ + +#ifdef HAVE___SKB_GSO_SEGMENT +#undef __skb_gso_segment segs = __skb_gso_segment(skb, 0, tx_path); +#else +#undef skb_gso_segment + segs = skb_gso_segment(skb, 0); +#endif + if (!segs || IS_ERR(segs)) goto free; diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h index 38315c251..3deb93dbb 100644 --- a/datapath/linux/compat/include/linux/netdevice.h +++ b/datapath/linux/compat/include/linux/netdevice.h @@ -88,31 +88,42 @@ static inline struct net_device *dev_get_by_index_rcu(struct net *net, int ifind typedef u32 netdev_features_t; #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0) +#define OVS_USE_COMPAT_GSO_SEGMENTATION +#endif + +#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION +/* define compat version to handle MPLS segmentation offload. */ +#define __skb_gso_segment rpl__skb_gso_segment +struct sk_buff *rpl__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, + bool tx_path); + #define skb_gso_segment rpl_skb_gso_segment -struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, - netdev_features_t features); +static inline +struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features) +{ + return rpl__skb_gso_segment(skb, features, true); +} #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) #define netif_skb_features rpl_netif_skb_features netdev_features_t rpl_netif_skb_features(struct sk_buff *skb); - -#define netif_needs_gso rpl_netif_needs_gso -static inline int rpl_netif_needs_gso(struct sk_buff *skb, int features) -{ - return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || - unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); -} #endif -#ifndef HAVE___SKB_GSO_SEGMENT -static inline struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, - bool tx_path) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +static inline int rpl_netif_needs_gso(struct net_device *dev, + struct sk_buff *skb, int features) { - return skb_gso_segment(skb, features); +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) + return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); +#else + return netif_needs_gso(skb, features); +#endif } +#define netif_needs_gso rpl_netif_needs_gso #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) diff --git a/datapath/linux/compat/netdevice.c b/datapath/linux/compat/netdevice.c index 7bb8f7793..483d665d8 100644 --- a/datapath/linux/compat/netdevice.c +++ b/datapath/linux/compat/netdevice.c @@ -75,9 +75,10 @@ netdev_features_t rpl_netif_skb_features(struct sk_buff *skb) EXPORT_SYMBOL_GPL(rpl_netif_skb_features); #endif /* kernel version < 2.6.38 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) -struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, - netdev_features_t features) +#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION +struct sk_buff *rpl__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, + bool tx_path) { int vlan_depth = ETH_HLEN; __be16 type = skb->protocol; @@ -99,14 +100,20 @@ struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, type = ovs_skb_get_inner_protocol(skb); /* this hack needed to get regular skb_gso_segment() */ -#undef skb_gso_segment skb_proto = skb->protocol; skb->protocol = type; +#ifdef HAVE___SKB_GSO_SEGMENT +#undef __skb_gso_segment + skb_gso = __skb_gso_segment(skb, features, tx_path); +#else +#undef skb_gso_segment skb_gso = skb_gso_segment(skb, features); +#endif + skb->protocol = skb_proto; return skb_gso; } -EXPORT_SYMBOL_GPL(rpl_skb_gso_segment); +EXPORT_SYMBOL_GPL(rpl__skb_gso_segment); -#endif /* kernel version < 3.16.0 */ +#endif /* OVS_USE_COMPAT_GSO_SEGMENTATION */ From d0000b68f948536045cbf528198148020e1362eb Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Fri, 8 May 2015 06:16:51 +0000 Subject: [PATCH 002/146] datapath-windows: Added new function for native forwarded traffic Signed-off-by: Sorin Vinturis Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/PacketIO.c | 87 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 47 deletions(-) diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 1f5c8b0f6..902e9aa4f 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -176,6 +176,29 @@ OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, sendCompleteFlags); } +static VOID +OvsAppendNativeForwardedPacket(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + PNET_BUFFER_LIST *nativeNbls, + ULONG flags, + BOOLEAN isRecv) +{ + POVS_BUFFER_CONTEXT ctx = { 0 }; + NDIS_STRING filterReason; + + *nativeNbls = curNbl; + nativeNbls = &(curNbl->Next); + + ctx = OvsInitExternalNBLContext(switchContext, curNbl, isRecv); + if (ctx == NULL) { + RtlInitUnicodeString(&filterReason, + L"Cannot allocate native NBL context."); + + OvsStartNBLIngressError(switchContext, curNbl, flags, &filterReason, + NDIS_STATUS_RESOURCES); + } +} + static VOID OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST netBufferLists, @@ -193,9 +216,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, LIST_ENTRY missedPackets; UINT32 num = 0; OvsCompletionList completionList; - PNET_BUFFER_LIST ovsForwardedNbls = NULL; PNET_BUFFER_LIST nativeForwardedNbls = NULL; - PNET_BUFFER_LIST *nextOvsForwardNbl = &ovsForwardedNbls; PNET_BUFFER_LIST *nextNativeForwardedNbl = &nativeForwardedNbls; dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)? @@ -206,48 +227,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, InitializeListHead(&missedPackets); OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags); -#if (NDIS_SUPPORT_NDIS640) - /* - * Split NBL list into NBLs to be forwarded by us, and those that require - * native forwarding. - */ for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { - nextNbl = curNbl->Next; - curNbl->Next = NULL; - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); - - if (fwdDetail->NativeForwardingRequired) { - POVS_BUFFER_CONTEXT ctx; - - *nextNativeForwardedNbl = curNbl; - nextNativeForwardedNbl = &(curNbl->Next); - - ctx = OvsInitExternalNBLContext(switchContext, curNbl, - sourcePort == switchContext->virtualExternalPortId); - if (ctx == NULL) { - RtlInitUnicodeString(&filterReason, - L"Cannot allocate native NBL context."); - - OvsStartNBLIngressError(switchContext, curNbl, - sendCompleteFlags, &filterReason, - NDIS_STATUS_RESOURCES); - - continue; - } - } else { - *nextOvsForwardNbl = curNbl; - nextOvsForwardNbl = &(curNbl->Next); - } - } -#else - UNREFERENCED_PARAMETER(nativeForwardedNbls); - UNREFERENCED_PARAMETER(nextNativeForwardedNbl); - UNREFERENCED_PARAMETER(nextOvsForwardNbl); - - ovsForwardedNbls = netBufferLists; -#endif - - for (curNbl = ovsForwardedNbls; curNbl != NULL; curNbl = nextNbl) { POVS_VPORT_ENTRY vport; UINT32 portNo; OVS_DATAPATH *datapath = &switchContext->datapath; @@ -259,6 +239,23 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, nextNbl = curNbl->Next; curNbl->Next = NULL; + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + sourcePort = fwdDetail->SourcePortId; + sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; + +#if (NDIS_SUPPORT_NDIS640) + if (fwdDetail->NativeForwardingRequired) { + /* Add current NBL to those that require native forwarding. */ + OvsAppendNativeForwardedPacket( + switchContext, + curNbl, + nextNativeForwardedNbl, + sendCompleteFlags, + sourcePort == switchContext->virtualExternalPortId); + continue; + } +#endif + /* Ethernet Header is a guaranteed safe access. */ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); if (curNb->Next != NULL) { @@ -273,10 +270,6 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, POVS_BUFFER_CONTEXT ctx; OvsFlow *flow; - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); - sourcePort = fwdDetail->SourcePortId; - sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; - /* Take the DispatchLock so none of the VPORTs disconnect while * we are setting destination ports. * From caa63627b5d885f1008142cf75fdfa97314df0b5 Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Fri, 8 May 2015 06:17:43 +0000 Subject: [PATCH 003/146] datapath-windows: Correctly link newly allocated NBL OvsPartialCopyToMultipleNBLs function failed to correctly link the newly created NBL with single NB to the multiple NBLs list. Signed-off-by: Sorin Vinturis Co-authored-by: Alin Gabriel Serdean Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/BufferMgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c index 572b2988a..5adbb2518 100644 --- a/datapath-windows/ovsext/BufferMgmt.c +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -863,7 +863,7 @@ OvsPartialCopyToMultipleNBLs(PVOID ovsContext, if (prevNbl == NULL) { firstNbl = newNbl; } else { - NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl; + NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl; NET_BUFFER_NEXT_NB(prevNb) = nb; } prevNbl = newNbl; From 37ec7b365631bf4a8951b7567e7fb94aba80066a Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 7 May 2015 13:36:43 -0700 Subject: [PATCH 004/146] travis: Fix clang build for DPDK-2.0. -Wno-cast-align is a CFLAG, not a configure option. Signed-off-by: Joe Stringer Acked-by: Daniele Di Proietto --- .travis/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis/build.sh b/.travis/build.sh index c7796e13b..6dfc9fe14 100755 --- a/.travis/build.sh +++ b/.travis/build.sh @@ -74,7 +74,7 @@ if [ "$DPDK" ]; then install_dpdk $DPDK_VER if [ "$CC" = "clang" ]; then # Disregard cast alignment errors until DPDK is fixed - EXTRA_OPTS="$EXTRA_OPTS -Wno-cast-align" + CFLAGS="$CFLAGS -Wno-cast-align" fi EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=./dpdk-$DPDK_VER/build" elif [ "$CC" != "clang" ]; then From 3afcde4381bd0fdcb9eb711bf49b14a48d64e944 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 9 Apr 2015 18:40:51 -0700 Subject: [PATCH 005/146] datapath: Add support for 4.0 kernel. Signed-off-by: Joe Stringer Acked-by: Jesse Gross --- .travis.yml | 1 + .travis/build.sh | 4 +++- FAQ.md | 2 +- NEWS | 2 +- acinclude.m4 | 10 ++++++---- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index ffb9744fe..36d95bdf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ env: - TESTSUITE=1 KERNEL=3.18.1 - TESTSUITE=1 OPTS="--enable-shared" - BUILD_ENV="-m32" OPTS="--disable-ssl" + - KERNEL=4.0.2 - KERNEL=3.17.7 DPDK=1 - KERNEL=3.17.7 DPDK=1 OPTS="--enable-shared" - KERNEL=3.17.7 diff --git a/.travis/build.sh b/.travis/build.sh index 6dfc9fe14..e90f4d08b 100755 --- a/.travis/build.sh +++ b/.travis/build.sh @@ -9,7 +9,9 @@ EXTRA_OPTS="" function install_kernel() { - if [[ "$1" =~ ^3.* ]]; then + if [[ "$1" =~ ^4.* ]]; then + PREFIX="v4.x" + elif [[ "$1" =~ ^3.* ]]; then PREFIX="v3.x" else PREFIX="v2.6/longterm/v2.6.32" diff --git a/FAQ.md b/FAQ.md index d22867889..045eed1d1 100644 --- a/FAQ.md +++ b/FAQ.md @@ -156,7 +156,7 @@ A: The following table lists the Linux kernel versions against which the | 2.0.x | 2.6.32 to 3.10 | 2.1.x | 2.6.32 to 3.11 | 2.3.x | 2.6.32 to 3.14 -| 2.4.x | 2.6.32 to 3.19 +| 2.4.x | 2.6.32 to 4.0 Open vSwitch userspace should also work with the Linux kernel module built into Linux 3.3 and later. diff --git a/NEWS b/NEWS index 882a3814c..a4806077d 100644 --- a/NEWS +++ b/NEWS @@ -65,7 +65,7 @@ Post-v2.3.0 - Added support for DPDK Tunneling. VXLAN, GRE, and Geneve are supported protocols. This is generic tunneling mechanism for userspace datapath. - Support for multicast snooping (IGMPv1 and IGMPv2) - - Support for Linux kernels up to 3.19.x + - Support for Linux kernels up to 4.0.x - The documentation now use the term 'destination' to mean one of syslog, console or file for vlog logging instead of the previously used term 'facility'. diff --git a/acinclude.m4 b/acinclude.m4 index e9d0ed968..aab7df8c9 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -133,12 +133,14 @@ AC_DEFUN([OVS_CHECK_LINUX], [ fi AC_MSG_RESULT([$kversion]) - if test "$version" -ge 3; then - if test "$version" = 3 && test "$patchlevel" -le 19; then - : # Linux 3.x + if test "$version" -ge 4; then + if test "$version" = 4 && test "$patchlevel" -le 0; then + : # Linux 4.x else - AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 3.19.x is not supported (please refer to the FAQ for advice)]) + AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 4.0.x is not supported (please refer to the FAQ for advice)]) fi + elif test "$version" = 3; then + : # Linux 3.x else if test "$version" -le 1 || test "$patchlevel" -le 5 || test "$sublevel" -le 31; then AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version 2.6.32 or later is required]) From 36be51c5b09c97a62c342e8ff0bb2d1a33ea2b68 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 8 May 2015 09:15:43 -0700 Subject: [PATCH 006/146] ofp-util: Use OFPGMFC_OUT_OF_BUCKETS for indirect groups with !=1 buckets. OpenFlow 1.3 says: If a switch cannot add the incoming group entry due to restrictions (hardware or otherwise) limiting the number of group buckets, it must refuse to add the group entry and must send an ofp_error_msg with OFPET_GROUP_MOD_FAILED type and OFPGMFC_OUT_OF_BUCKETS code. This indicates that OFPGMFC_OUT_OF_BUCKETS is appropriate for an indirect group with the wrong number of buckets, but OVS was using a different error. This fixes the problem. ONF-JIRA: EXT-546 Reported-by: Mrinmoy Das Signed-off-by: Ben Pfaff Acked-by: Justin Pettit --- AUTHORS | 1 + lib/ofp-util.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 53fd835e4..891cfe976 100644 --- a/AUTHORS +++ b/AUTHORS @@ -303,6 +303,7 @@ Mike Bursell mike.bursell@citrix.com Mike Kruze mkruze@nicira.com Min Chen ustcer.tonychan@gmail.com Mikael Doverhag mdoverhag@nicira.com +Mrinmoy Das mrdas@ixiacom.com Nagi Reddy Jonnala njonnala@Brocade.com Niels van Adrichem N.L.M.vanAdrichem@tudelft.nl Niklas Andersson nandersson@nicira.com diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 60cc67432..6366919a4 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -8375,7 +8375,7 @@ ofputil_decode_group_mod(const struct ofp_header *oh, switch (gm->type) { case OFPGT11_INDIRECT: if (!list_is_singleton(&gm->buckets)) { - return OFPERR_OFPGMFC_INVALID_GROUP; + return OFPERR_OFPGMFC_OUT_OF_BUCKETS; } break; case OFPGT11_ALL: From c875bb948df059855c18b29bdfbfbfcb986e607a Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Thu, 26 Mar 2015 12:52:42 -0700 Subject: [PATCH 007/146] utilities: Add new pipeline generator script. When doing OVS performance testing, it's important to have both realistic traffic traces and OpenFlow pipelines on which to evaluate prospective changes. As a first step in this direction, this patch adds a python script which generates an OpenFlow pipeline intended to simulate typical network virtualization workloads. Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto --- utilities/automake.mk | 5 +- utilities/ovs-pipegen.py | 122 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 2 deletions(-) create mode 100755 utilities/ovs-pipegen.py diff --git a/utilities/automake.mk b/utilities/automake.mk index a06630f1f..6083b4b68 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -34,9 +34,8 @@ utilities/ovs-lib: $(top_builddir)/config.status docs += utilities/ovs-command-bashcomp.INSTALL.md EXTRA_DIST += \ - utilities/ovs-check-dead-ifs.in \ utilities/ovs-appctl-bashcomp.bash \ - utilities/ovs-vsctl-bashcomp.bash \ + utilities/ovs-check-dead-ifs.in \ utilities/ovs-command-bashcomp.INSTALL.md \ utilities/ovs-ctl.in \ utilities/ovs-dev.py \ @@ -46,11 +45,13 @@ EXTRA_DIST += \ utilities/ovs-lib.in \ utilities/ovs-parse-backtrace.in \ utilities/ovs-pcap.in \ + utilities/ovs-pipegen.py \ utilities/ovs-pki.in \ utilities/ovs-save \ utilities/ovs-tcpundump.in \ utilities/ovs-test.in \ utilities/ovs-vlan-test.in \ + utilities/ovs-vsctl-bashcomp.bash \ utilities/qemu-wrap.py MAN_ROOTS += \ utilities/ovs-appctl.8.in \ diff --git a/utilities/ovs-pipegen.py b/utilities/ovs-pipegen.py new file mode 100755 index 000000000..95647d1e2 --- /dev/null +++ b/utilities/ovs-pipegen.py @@ -0,0 +1,122 @@ +#!/usr/bin/python +# Copyright (c) 2013, 2014, 2015 Nicira, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import random +import sys +import textwrap + +def flow_str(stage, match, action, priority=32768): + mtd_match = "metadata=%d" % stage + if match: + mtd_match += "," + match + + return "priority=%d %s,actions=%s" % (priority, mtd_match, action) + + +def resubmit(nxt): + return "load:%d->OXM_OF_METADATA[],resubmit(,0)" % nxt + + +def rand_ip_mask(): + return ("%d.%d.%d.%d" % (random.randint(0, 255), random.randint(0, 255), + random.randint(0, 255), random.randint(0, 255)), + random.choice([8, 16, 24, 32])) + + +def rand_bool(): + return bool(random.randint(0, 1)) + + +def l2(stage, action): + mac = ["%x" % random.randint(0, 2 ** 8 - 1) for x in range(6)] + mac = [x.zfill(2) for x in mac] + mac = ":".join(mac) + return flow_str(stage, "dl_dst=%s" % mac, action) + + +def l3(stage, action): + ip, mask = rand_ip_mask() + return flow_str(stage, "ip,ip_dst=%s/%d" % (ip, mask), action, + priority=mask) + + +def l4(stage, action): + match = "tcp" + + if rand_bool(): + match += ",ip_src=%s/%d" % rand_ip_mask() + + if rand_bool(): + match += ",ip_dst=%s/%d" % rand_ip_mask() + + src_dst = "tp_src" if rand_bool() else "tp_dst" + match += ",%s=%d" % (src_dst, random.randint(1024, 2**16 - 1)) + return flow_str(stage, match, action) + + +def pipeline(size): + pipeline = [l2, l3, l4, l2] + + flows = [] + for stage in xrange(len(pipeline)): + action = resubmit(stage + 1) + flows += [pipeline[stage](stage, action) for _ in xrange(size)] + flows.append(flow_str(stage, "", action, priority=1)) + + flows.append(flow_str(len(pipeline), "", "in_port")) + + for f in flows: + print f + + +def main(): + description = textwrap.dedent( + """ + Generate a test OpenFlow pipeline. + + Open vSwitch relies heavily on flow caching to get good performance for + packet processing. While on average, this produces good results, + performance is heavily depedent on the slow path OpenFlow tables, and + how they're translated into datapath megaflows. For this reason, when + doing performance testing it's important to run with "realistic" + OpenFlow tables to ensure results will stand up in the real world. + + This script generates a simple OpenFlow pipeline intended to simulate + realistic network virtualization workloads. All traffic received is + run through a series of OpenFlow tables designed to simulate a logical + switch, router, and firewall, before forwarded back on the in_port. + """) + + epilog = textwrap.dedent( + """ + typical usage: + ovs-ofctl del-flows bridge \\ + && %s | ovs-ofctl add-flows bridge - \\ + && ovs-ofctl dump-flows bridge + """ % sys.argv[0]) + + parser = argparse.ArgumentParser(description=description, epilog=epilog, + formatter_class=\ + argparse.RawDescriptionHelpFormatter) + parser.add_argument("--size", dest="size", default=1000, + help="Size (rules) of each OpenFlow table.") + args=parser.parse_args() + + pipeline(int(args.size)) + + +if __name__ == "__main__": + main() From 4345e1b5bf563ebfd7a7dcf489eac0fdf68135cf Mon Sep 17 00:00:00 2001 From: Kevin Traynor Date: Mon, 11 May 2015 21:58:12 -0700 Subject: [PATCH 008/146] netdev-dpdk: Change phy rx burst size. Change phy rx burst size from 192 to 32. This aligns the burst size with the other dpdk interfaces and significantly improves performance when forwarding to dpdk vhost ports. Signed-off-by: Kevin Traynor Acked-by: Pravin B Shelar --- lib/netdev-dpdk.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 5af15d421..cbb266deb 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -144,7 +144,6 @@ static const struct rte_eth_txconf tx_conf = { .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS|ETH_TXQ_FLAGS_NOOFFLOADS, }; -enum { MAX_RX_QUEUE_LEN = 192 }; enum { MAX_TX_QUEUE_LEN = 384 }; enum { DPDK_RING_SIZE = 256 }; BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE)); @@ -885,8 +884,8 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, nb_rx = rte_eth_rx_burst(rx->port_id, rxq_->queue_id, (struct rte_mbuf **) packets, - MIN((int)NETDEV_MAX_RX_BATCH, - (int)MAX_RX_QUEUE_LEN)); + MIN((int) NETDEV_MAX_RX_BATCH, + (int) MAX_PKT_BURST)); if (!nb_rx) { return EAGAIN; } From 95e9881f843896751a76481cfe7869e2c0c1270b Mon Sep 17 00:00:00 2001 From: Kevin Traynor Date: Mon, 11 May 2015 21:58:14 -0700 Subject: [PATCH 009/146] netdev-dpdk: Add vhost enqueue retries. The max allowed burst size for a single vhost enqueue is 32. This code facilitates trying to send greater than the burst size of packets to the vhost interface by adding a retry loop and calling vhost enqueue multiple times. As this could potentially block, a timeout is added. Signed-off-by: Kevin Traynor Signed-off-by: Pravin B Shelar --- lib/netdev-dpdk.c | 54 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index cbb266deb..505ab751e 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -104,6 +104,11 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) /* Character device cuse_dev_name. */ char *cuse_dev_name = NULL; +/* + * Maximum amount of time in micro seconds to try and enqueue to vhost. + */ +#define VHOST_ENQ_RETRY_USECS 100 + static const struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, @@ -901,7 +906,9 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, { struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev); struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev); - int tx_pkts, i; + struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts; + unsigned int total_pkts = cnt; + uint64_t start = 0; if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { ovs_mutex_lock(&vhost_dev->mutex); @@ -912,16 +919,51 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, /* There is vHost TX single queue, So we need to lock it for TX. */ rte_spinlock_lock(&vhost_dev->txq_lock); - tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ, - (struct rte_mbuf **)pkts, cnt); - vhost_dev->stats.tx_packets += tx_pkts; - vhost_dev->stats.tx_dropped += (cnt - tx_pkts); + do { + unsigned int tx_pkts; + + tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ, + cur_pkts, cnt); + if (OVS_LIKELY(tx_pkts)) { + /* Packets have been sent.*/ + cnt -= tx_pkts; + /* Prepare for possible next iteration.*/ + cur_pkts = &cur_pkts[tx_pkts]; + } else { + uint64_t timeout = VHOST_ENQ_RETRY_USECS * rte_get_timer_hz() / 1E6; + unsigned int expired = 0; + + if (!start) { + start = rte_get_timer_cycles(); + } + + /* + * Unable to enqueue packets to vhost interface. + * Check available entries before retrying. + */ + while (!rte_vring_available_entries(virtio_dev, VIRTIO_RXQ)) { + if (OVS_UNLIKELY((rte_get_timer_cycles() - start) > timeout)) { + expired = 1; + break; + } + } + if (expired) { + /* break out of main loop. */ + break; + } + } + } while (cnt); + + vhost_dev->stats.tx_packets += (total_pkts - cnt); + vhost_dev->stats.tx_dropped += cnt; rte_spinlock_unlock(&vhost_dev->txq_lock); out: if (may_steal) { - for (i = 0; i < cnt; i++) { + int i; + + for (i = 0; i < total_pkts; i++) { dp_packet_delete(pkts[i]); } } From 9899125aaae9f0634f43307cc0ff72f5afb287bb Mon Sep 17 00:00:00 2001 From: Oleg Strikov Date: Fri, 8 May 2015 12:05:13 -0700 Subject: [PATCH 010/146] INSTALL.DPDK: Notes on running ovs-vswitchd/dpdk inside a VM Additional configuration is required if you want to run ovs-vswitchd with DPDK backend inside a QEMU virtual machine. This happens because, by default, virtio NIC provided to the guest doesn't support multiple TX queues which are required by ovs-vswitchd/dpdk. This commit updates INSTALL.DPDK.md to provide guidelines on how to enable support for multiple TX queues using QEMU command line and Libvirt config file. Signed-off-by: Oleg Strikov Acked-by: Pravin B Shelar --- INSTALL.DPDK.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index a05367a97..068d6315a 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -560,6 +560,24 @@ steps in the previous section before proceeding with the following steps: 5. Use virt-manager to launch the VM +Running ovs-vswitchd with DPDK backend inside a VM +-------------------------------------------------- + +Please note that additional configuration is required if you want to run +ovs-vswitchd with DPDK backend inside a QEMU virtual machine. Ovs-vswitchd +creates separate DPDK TX queues for each CPU core available. This operation +fails inside QEMU virtual machine because, by default, VirtIO NIC provided +to the guest is configured to support only single TX queue and single RX +queue. To change this behavior, you need to turn on 'mq' (multiqueue) +property of all virtio-net-pci devices emulated by QEMU and used by DPDK. +You may do it manually (by changing QEMU command line) or, if you use Libvirt, +by adding the following string: + +`` + +to sections of all network devices used by DPDK. Parameter 'N' +determines how many queues can be used by the guest. + Restrictions: ------------- From d23239a29f566f4c73d1d871a6cf55da90a9fa8e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 15 May 2015 06:27:35 -0700 Subject: [PATCH 011/146] datapath: backport kfree_skb_list() Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- acinclude.m4 | 1 + datapath/linux/compat/include/linux/skbuff.h | 4 ++++ datapath/linux/compat/skbuff-openvswitch.c | 13 +++++++++++++ 3 files changed, 18 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index aab7df8c9..d09a73fc1 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -344,6 +344,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ # quoting rules. OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [[[^@]]proto_data_valid], [OVS_DEFINE([HAVE_PROTO_DATA_VALID])]) + OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [kfree_skb_list]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [rxhash]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [u16.*rxhash], [OVS_DEFINE([HAVE_U16_RXHASH])]) diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index 0ae6c133f..1a576a00a 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -368,4 +368,8 @@ int rpl_skb_vlan_pop(struct sk_buff *skb); int rpl_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); #endif +#ifndef HAVE_KFREE_SKB_LIST +void rpl_kfree_skb_list(struct sk_buff *segs); +#define kfree_skb_list rpl_kfree_skb_list +#endif #endif diff --git a/datapath/linux/compat/skbuff-openvswitch.c b/datapath/linux/compat/skbuff-openvswitch.c index 3ecf1fe29..fad1cc7d0 100644 --- a/datapath/linux/compat/skbuff-openvswitch.c +++ b/datapath/linux/compat/skbuff-openvswitch.c @@ -267,3 +267,16 @@ int rpl_pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, EXPORT_SYMBOL(rpl_pskb_expand_head); #endif + +#ifndef HAVE_KFREE_SKB_LIST +void rpl_kfree_skb_list(struct sk_buff *segs) +{ + while (segs) { + struct sk_buff *next = segs->next; + + kfree_skb(segs); + segs = next; + } +} +EXPORT_SYMBOL(rpl_kfree_skb_list); +#endif From 91f831671269ade5e936812ae1dc1950105c748d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 15 May 2015 06:32:32 -0700 Subject: [PATCH 012/146] datapath: Fix Sparse warning. CHECK /home/pravin/ovs/w8/datapath/linux/flow_table.c /home/pravin/ovs/w8/datapath/linux/flow_table.c:536:6: warning: symbol 'ovs_flow_cmp_unmasked_key' was not declared. Should it be static? Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- datapath/flow_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 9a27bea97..409f9dde4 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -533,8 +533,8 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow, return cmp_key(&flow->key, key, range->start, range->end); } -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match) +static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); From 401aa90e33befe59f47c48a21221613149bf811e Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 18 May 2015 10:24:02 -0700 Subject: [PATCH 013/146] ofproto: Fix memory leak in flow deletion. Fix a memory leak that was introduced in commit 834fe5cb997b (ofproto: Additional simplifications.). We used to unref the flow asynchronously, but forgot to do it when the support for asynchronous operations was removed. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 927521bb2..e06273276 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -4715,6 +4715,8 @@ delete_flows__(const struct rule_collection *rules, learned_cookies_dec(ofproto, rule_get_actions(rule), &dead_cookies); + + ofproto_rule_unref(rule); } learned_cookies_flush(ofproto, &dead_cookies); ofmonitor_flush(ofproto->connmgr); From 8613db65be6a4641eebc01765637701287f78efc Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:45 -0700 Subject: [PATCH 014/146] dp-packet: Remove 'list' member. The 'list' member is only used (two users) in the slow path. This commit removes it to reduce the struct size Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dp-packet.c | 13 ------------ lib/dp-packet.h | 8 ------- lib/netdev-dummy.c | 48 +++++++++++++++++++++++++++++++++++------- ofproto/ofproto-dpif.c | 30 ++++++++++++-------------- 4 files changed, 54 insertions(+), 45 deletions(-) diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 8a4cf43ea..b2d9d5c69 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -31,7 +31,6 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so b->l2_pad_size = 0; b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; b->md = PKT_METADATA_INITIALIZER(0); - list_poison(&b->list_node); } static void @@ -460,18 +459,6 @@ dp_packet_to_string(const struct dp_packet *b, size_t maxbytes) return ds_cstr(&s); } -/* Removes each of the "struct dp_packet"s on 'list' from the list and frees - * them. */ -void -dp_packet_list_delete(struct ovs_list *list) -{ - struct dp_packet *b; - - LIST_FOR_EACH_POP (b, list_node, list) { - dp_packet_delete(b); - } -} - static inline void dp_packet_adjust_layer_offset(uint16_t *offset, int increment) { diff --git a/lib/dp-packet.h b/lib/dp-packet.h index fd23d1153..29a883bc3 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -78,7 +78,6 @@ struct dp_packet { uint16_t l4_ofs; /* Transport-level header offset from 'frame', or UINT16_MAX. */ struct pkt_metadata md; - struct ovs_list list_node; /* Private list element for use by owner. */ }; static inline void * dp_packet_data(const struct dp_packet *); @@ -159,8 +158,6 @@ static inline void *dp_packet_try_pull(struct dp_packet *, size_t); void *dp_packet_steal_data(struct dp_packet *); char *dp_packet_to_string(const struct dp_packet *, size_t maxbytes); -static inline struct dp_packet *dp_packet_from_list(const struct ovs_list *); -void dp_packet_list_delete(struct ovs_list *); static inline bool dp_packet_equal(const struct dp_packet *, const struct dp_packet *); @@ -262,11 +259,6 @@ static inline void *dp_packet_try_pull(struct dp_packet *b, size_t size) ? dp_packet_pull(b, size) : NULL; } -static inline struct dp_packet *dp_packet_from_list(const struct ovs_list *list) -{ - return CONTAINER_OF(list, struct dp_packet, list_node); -} - static inline bool dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b) { return dp_packet_size(a) == dp_packet_size(b) && diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 64f8f66b9..33d0876d5 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -84,6 +84,11 @@ struct dummy_packet_conn { } u; }; +struct pkt_list_node { + struct dp_packet *pkt; + struct ovs_list list_node; +}; + /* Protects 'dummy_list'. */ static struct ovs_mutex dummy_list_mutex = OVS_MUTEX_INITIALIZER; @@ -131,6 +136,8 @@ static void netdev_dummy_queue_packet(struct netdev_dummy *, struct dp_packet *) static void dummy_packet_stream_close(struct dummy_packet_stream *); +static void pkt_list_delete(struct ovs_list *); + static bool is_dummy_class(const struct netdev_class *class) { @@ -186,10 +193,14 @@ dummy_packet_stream_send(struct dummy_packet_stream *s, const void *buffer, size { if (list_size(&s->txq) < NETDEV_DUMMY_MAX_QUEUE) { struct dp_packet *b; + struct pkt_list_node *node; b = dp_packet_clone_data_with_headroom(buffer, size, 2); put_unaligned_be16(dp_packet_push_uninit(b, 2), htons(size)); - list_push_back(&s->txq, &b->list_node); + + node = xmalloc(sizeof *node); + node->pkt = b; + list_push_back(&s->txq, &node->list_node); } } @@ -202,16 +213,19 @@ dummy_packet_stream_run(struct netdev_dummy *dev, struct dummy_packet_stream *s) stream_run(s->stream); if (!list_is_empty(&s->txq)) { + struct pkt_list_node *txbuf_node; struct dp_packet *txbuf; int retval; - txbuf = dp_packet_from_list(list_front(&s->txq)); + ASSIGN_CONTAINER(txbuf_node, list_front(&s->txq), list_node); + txbuf = txbuf_node->pkt; retval = stream_send(s->stream, dp_packet_data(txbuf), dp_packet_size(txbuf)); if (retval > 0) { dp_packet_pull(txbuf, retval); if (!dp_packet_size(txbuf)) { - list_remove(&txbuf->list_node); + list_remove(&txbuf_node->list_node); + free(txbuf_node); dp_packet_delete(txbuf); } } else if (retval != -EAGAIN) { @@ -263,7 +277,7 @@ dummy_packet_stream_close(struct dummy_packet_stream *s) { stream_close(s->stream); dp_packet_uninit(&s->rxbuf); - dp_packet_list_delete(&s->txq); + pkt_list_delete(&s->txq); } static void @@ -797,7 +811,7 @@ netdev_dummy_rxq_destruct(struct netdev_rxq *rxq_) ovs_mutex_lock(&netdev->mutex); list_remove(&rx->node); - dp_packet_list_delete(&rx->recv_queue); + pkt_list_delete(&rx->recv_queue); ovs_mutex_unlock(&netdev->mutex); seq_destroy(rx->seq); } @@ -820,7 +834,11 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr, ovs_mutex_lock(&netdev->mutex); if (!list_is_empty(&rx->recv_queue)) { - packet = dp_packet_from_list(list_pop_front(&rx->recv_queue)); + struct pkt_list_node *pkt_node; + + ASSIGN_CONTAINER(pkt_node, list_pop_front(&rx->recv_queue), list_node); + packet = pkt_node->pkt; + free(pkt_node); rx->recv_queue_len--; } else { packet = NULL; @@ -866,7 +884,7 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_) struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); ovs_mutex_lock(&netdev->mutex); - dp_packet_list_delete(&rx->recv_queue); + pkt_list_delete(&rx->recv_queue); rx->recv_queue_len = 0; ovs_mutex_unlock(&netdev->mutex); @@ -1116,6 +1134,17 @@ static const struct netdev_class dummy_class = { netdev_dummy_rxq_drain, }; +static void +pkt_list_delete(struct ovs_list *l) +{ + struct pkt_list_node *pkt; + + LIST_FOR_EACH_POP(pkt, list_node, l) { + dp_packet_delete(pkt->pkt); + free(pkt); + } +} + static struct dp_packet * eth_from_packet_or_flow(const char *s) { @@ -1159,7 +1188,10 @@ eth_from_packet_or_flow(const char *s) static void netdev_dummy_queue_packet__(struct netdev_rxq_dummy *rx, struct dp_packet *packet) { - list_push_back(&rx->recv_queue, &packet->list_node); + struct pkt_list_node *pkt_node = xmalloc(sizeof *pkt_node); + + pkt_node->pkt = packet; + list_push_back(&rx->recv_queue, &pkt_node->list_node); rx->recv_queue_len++; seq_change(rx->seq); } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index bf893214b..d151bb7d5 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -2974,43 +2974,41 @@ static void bundle_send_learning_packets(struct ofbundle *bundle) { struct ofproto_dpif *ofproto = bundle->ofproto; - struct dp_packet *learning_packet; int error, n_packets, n_errors; struct mac_entry *e; + struct pkt_list { + struct ovs_list list_node; + struct ofport_dpif *port; + struct dp_packet *pkt; + } *pkt_node; struct ovs_list packets; list_init(&packets); ovs_rwlock_rdlock(&ofproto->ml->rwlock); LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { if (mac_entry_get_port(ofproto->ml, e) != bundle) { - void *port_void; - - learning_packet = bond_compose_learning_packet(bundle->bond, - e->mac, e->vlan, - &port_void); - /* Temporarily use 'frame' as a private pointer (see below). */ - ovs_assert(learning_packet->frame == dp_packet_data(learning_packet)); - learning_packet->frame = port_void; - list_push_back(&packets, &learning_packet->list_node); + pkt_node = xmalloc(sizeof *pkt_node); + pkt_node->pkt = bond_compose_learning_packet(bundle->bond, + e->mac, e->vlan, + (void **)&pkt_node->port); + list_push_back(&packets, &pkt_node->list_node); } } ovs_rwlock_unlock(&ofproto->ml->rwlock); error = n_packets = n_errors = 0; - LIST_FOR_EACH (learning_packet, list_node, &packets) { + LIST_FOR_EACH_POP (pkt_node, list_node, &packets) { int ret; - void *port_void = learning_packet->frame; - /* Restore 'frame'. */ - learning_packet->frame = dp_packet_data(learning_packet); - ret = ofproto_dpif_send_packet(port_void, learning_packet); + ret = ofproto_dpif_send_packet(pkt_node->port, pkt_node->pkt); + dp_packet_delete(pkt_node->pkt); + free(pkt_node); if (ret) { error = ret; n_errors++; } n_packets++; } - dp_packet_list_delete(&packets); if (n_errors) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); From 82eb5b0abaf0125898a89e032f27fdd08e03169b Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:46 -0700 Subject: [PATCH 015/146] dp-packet: Remove 'frame' member. In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of data (Ethernet, OpenFlow, Netlink attributes). For Ethernet packets the 'frame' pointer was supposed to have the same value as the 'data' pointer. Since 'struct dp_packet' is only used for Ethernet packets, there's no need for a separate 'frame' pointer: we can use the 'data' pointer instead. Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dp-packet.c | 17 ++-------- lib/dp-packet.h | 71 ++++++++++++++++----------------------- lib/flow.c | 2 +- lib/packets.c | 6 ++-- lib/rstp-state-machines.c | 2 +- lib/stp.c | 2 +- 6 files changed, 37 insertions(+), 63 deletions(-) diff --git a/lib/dp-packet.c b/lib/dp-packet.c index b2d9d5c69..375b7b715 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -27,7 +27,6 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so { b->allocated = allocated; b->source = source; - b->frame = NULL; b->l2_pad_size = 0; b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; b->md = PKT_METADATA_INITIALIZER(0); @@ -164,12 +163,6 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom) new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer), dp_packet_size(buffer), headroom); - if (buffer->frame) { - uintptr_t data_delta - = (char *)dp_packet_data(new_buffer) - (char *)dp_packet_data(buffer); - - new_buffer->frame = (char *) buffer->frame + data_delta; - } new_buffer->l2_pad_size = buffer->l2_pad_size; new_buffer->l2_5_ofs = buffer->l2_5_ofs; new_buffer->l3_ofs = buffer->l3_ofs; @@ -255,11 +248,6 @@ dp_packet_resize__(struct dp_packet *b, size_t new_headroom, size_t new_tailroom new_data = (char *) new_base + new_headroom; if (dp_packet_data(b) != new_data) { - if (b->frame) { - uintptr_t data_delta = (char *) new_data - (char *) dp_packet_data(b); - - b->frame = (char *) b->frame + data_delta; - } dp_packet_set_data(b, new_data); } } @@ -479,12 +467,11 @@ dp_packet_resize_l2_5(struct dp_packet *b, int increment) dp_packet_pull(b, -increment); } - b->frame = dp_packet_data(b); /* Adjust layer offsets after l2_5. */ dp_packet_adjust_layer_offset(&b->l3_ofs, increment); dp_packet_adjust_layer_offset(&b->l4_ofs, increment); - return b->frame; + return dp_packet_data(b); } /* Adjust the size of the l2 portion of the dp_packet, updating the l2 @@ -495,5 +482,5 @@ dp_packet_resize_l2(struct dp_packet *b, int increment) { dp_packet_resize_l2_5(b, increment); dp_packet_adjust_layer_offset(&b->l2_5_ofs, increment); - return b->frame; + return dp_packet_data(b); } diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 29a883bc3..54a34453a 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -36,25 +36,8 @@ enum OVS_PACKED_ENUM dp_packet_source { ref to build_dp_packet() in netdev-dpdk. */ }; -/* Buffer for holding arbitrary data. An dp_packet is automatically reallocated +/* Buffer for holding packet data. A dp_packet is automatically reallocated * as necessary if it grows too large for the available memory. - * - * 'frame' and offset conventions: - * - * Network frames (aka "packets"): 'frame' MUST be set to the start of the - * packet, layer offsets MAY be set as appropriate for the packet. - * Additionally, we assume in many places that the 'frame' and 'data' are - * the same for packets. - * - * OpenFlow messages: 'frame' points to the start of the OpenFlow - * header, while 'l3_ofs' is the length of the OpenFlow header. - * When parsing, the 'data' will move past these, as data is being - * pulled from the OpenFlow message. - * - * Actions: When encoding OVS action lists, the 'frame' is used - * as a pointer to the beginning of the current action (see ofpact_put()). - * - * rconn: Reuses 'frame' as a private pointer while queuing. */ struct dp_packet { #ifdef DPDK_NETDEV @@ -67,16 +50,14 @@ struct dp_packet { #endif uint32_t allocated; /* Number of bytes allocated. */ - void *frame; /* Packet frame start, or NULL. */ enum dp_packet_source source; /* Source of memory allocated as 'base'. */ - uint8_t l2_pad_size; /* Detected l2 padding size. - * Padding is non-pullable. */ - uint16_t l2_5_ofs; /* MPLS label stack offset from 'frame', or - * UINT16_MAX */ - uint16_t l3_ofs; /* Network-level header offset from 'frame', - or UINT16_MAX. */ - uint16_t l4_ofs; /* Transport-level header offset from 'frame', - or UINT16_MAX. */ + uint8_t l2_pad_size; /* Detected l2 padding size. + * Padding is non-pullable. */ + uint16_t l2_5_ofs; /* MPLS label stack offset, or UINT16_MAX */ + uint16_t l3_ofs; /* Network-level header offset, + * or UINT16_MAX. */ + uint16_t l4_ofs; /* Transport-level header offset, + or UINT16_MAX. */ struct pkt_metadata md; }; @@ -91,7 +72,7 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t); void * dp_packet_resize_l2(struct dp_packet *, int increment); void * dp_packet_resize_l2_5(struct dp_packet *, int increment); static inline void * dp_packet_l2(const struct dp_packet *); -static inline void dp_packet_set_frame(struct dp_packet *, void *); +static inline void dp_packet_reset_offsets(struct dp_packet *); static inline uint8_t dp_packet_l2_pad_size(const struct dp_packet *); static inline void dp_packet_set_l2_pad_size(struct dp_packet *, uint8_t); static inline void * dp_packet_l2_5(const struct dp_packet *); @@ -265,18 +246,17 @@ static inline bool dp_packet_equal(const struct dp_packet *a, const struct dp_pa memcmp(dp_packet_data(a), dp_packet_data(b), dp_packet_size(a)) == 0; } -/* Get the start if the Ethernet frame. 'l3_ofs' marks the end of the l2 +/* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2 * headers, so return NULL if it is not set. */ static inline void * dp_packet_l2(const struct dp_packet *b) { - return (b->l3_ofs != UINT16_MAX) ? b->frame : NULL; + return (b->l3_ofs != UINT16_MAX) ? dp_packet_data(b) : NULL; } -/* Sets the packet frame start pointer and resets all layer offsets. - * l3 offset must be set before 'l2' can be retrieved. */ -static inline void dp_packet_set_frame(struct dp_packet *b, void *packet) +/* Resets all layer offsets. 'l3' offset must be set before 'l2' can be + * retrieved. */ +static inline void dp_packet_reset_offsets(struct dp_packet *b) { - b->frame = packet; b->l2_pad_size = 0; b->l2_5_ofs = UINT16_MAX; b->l3_ofs = UINT16_MAX; @@ -296,32 +276,40 @@ static inline void dp_packet_set_l2_pad_size(struct dp_packet *b, uint8_t pad_si static inline void * dp_packet_l2_5(const struct dp_packet *b) { - return b->l2_5_ofs != UINT16_MAX ? (char *)b->frame + b->l2_5_ofs : NULL; + return b->l2_5_ofs != UINT16_MAX + ? (char *) dp_packet_data(b) + b->l2_5_ofs + : NULL; } static inline void dp_packet_set_l2_5(struct dp_packet *b, void *l2_5) { - b->l2_5_ofs = l2_5 ? (char *)l2_5 - (char *)b->frame : UINT16_MAX; + b->l2_5_ofs = l2_5 + ? (char *) l2_5 - (char *) dp_packet_data(b) + : UINT16_MAX; } static inline void * dp_packet_l3(const struct dp_packet *b) { - return b->l3_ofs != UINT16_MAX ? (char *)b->frame + b->l3_ofs : NULL; + return b->l3_ofs != UINT16_MAX + ? (char *) dp_packet_data(b) + b->l3_ofs + : NULL; } static inline void dp_packet_set_l3(struct dp_packet *b, void *l3) { - b->l3_ofs = l3 ? (char *)l3 - (char *)b->frame : UINT16_MAX; + b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) : UINT16_MAX; } static inline void * dp_packet_l4(const struct dp_packet *b) { - return b->l4_ofs != UINT16_MAX ? (char *)b->frame + b->l4_ofs : NULL; + return b->l4_ofs != UINT16_MAX + ? (char *) dp_packet_data(b) + b->l4_ofs + : NULL; } static inline void dp_packet_set_l4(struct dp_packet *b, void *l4) { - b->l4_ofs = l4 ? (char *)l4 - (char *)b->frame : UINT16_MAX; + b->l4_ofs = l4 ? (char *) l4 - (char *) dp_packet_data(b) : UINT16_MAX; } static inline size_t dp_packet_l4_size(const struct dp_packet *b) @@ -471,8 +459,7 @@ static inline void dp_packet_set_data(struct dp_packet *b, void *data) static inline void dp_packet_reset_packet(struct dp_packet *b, int off) { dp_packet_set_size(b, dp_packet_size(b) - off); - dp_packet_set_data(b, (void *) ((unsigned char *) b->frame + off)); - b->frame = NULL; + dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off)); b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; } diff --git a/lib/flow.c b/lib/flow.c index e54280a45..0f9ee504c 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -449,7 +449,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) /* Initialize packet's layer pointer and offsets. */ l2 = data; - dp_packet_set_frame(packet, data); + dp_packet_reset_offsets(packet); /* Must have full Ethernet header to proceed. */ if (OVS_UNLIKELY(size < sizeof(struct eth_header))) { diff --git a/lib/packets.c b/lib/packets.c index 419c6af49..016b12bd4 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -170,7 +170,7 @@ compose_rarp(struct dp_packet *b, const uint8_t eth_src[ETH_ADDR_LEN]) memcpy(arp->ar_tha, eth_src, ETH_ADDR_LEN); put_16aligned_be32(&arp->ar_tpa, htonl(0)); - dp_packet_set_frame(b, eth); + dp_packet_reset_offsets(b); dp_packet_set_l3(b, arp); } @@ -579,7 +579,7 @@ eth_compose(struct dp_packet *b, const uint8_t eth_dst[ETH_ADDR_LEN], memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(eth_type); - dp_packet_set_frame(b, eth); + dp_packet_reset_offsets(b); dp_packet_set_l3(b, data); return data; @@ -1040,7 +1040,7 @@ compose_arp(struct dp_packet *b, const uint8_t eth_src[ETH_ADDR_LEN], put_16aligned_be32(&arp->ar_spa, ip_src); put_16aligned_be32(&arp->ar_tpa, ip_dst); - dp_packet_set_frame(b, eth); + dp_packet_reset_offsets(b); dp_packet_set_l3(b, arp); } diff --git a/lib/rstp-state-machines.c b/lib/rstp-state-machines.c index 7e2378977..f55221f3f 100644 --- a/lib/rstp-state-machines.c +++ b/lib/rstp-state-machines.c @@ -696,7 +696,7 @@ rstp_send_bpdu(struct rstp_port *p, const void *bpdu, size_t bpdu_size) pkt = dp_packet_new(ETH_HEADER_LEN + LLC_HEADER_LEN + bpdu_size); eth = dp_packet_put_zeros(pkt, sizeof *eth); llc = dp_packet_put_zeros(pkt, sizeof *llc); - dp_packet_set_frame(pkt, eth); + dp_packet_reset_offsets(pkt); dp_packet_set_l3(pkt, dp_packet_put(pkt, bpdu, bpdu_size)); /* 802.2 header. */ diff --git a/lib/stp.c b/lib/stp.c index ec8b01a4e..22bd93a60 100644 --- a/lib/stp.c +++ b/lib/stp.c @@ -1576,7 +1576,7 @@ stp_send_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) pkt = dp_packet_new(ETH_HEADER_LEN + LLC_HEADER_LEN + bpdu_size); eth = dp_packet_put_zeros(pkt, sizeof *eth); llc = dp_packet_put_zeros(pkt, sizeof *llc); - dp_packet_set_frame(pkt, eth); + dp_packet_reset_offsets(pkt); dp_packet_set_l3(pkt, dp_packet_put(pkt, bpdu, bpdu_size)); /* 802.2 header. */ From 11a6fbd553ec46b890459e82e5387d28a2664695 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:47 -0700 Subject: [PATCH 016/146] dp-packet: Merge 'allocated' member with DPDK mbuf 'buf_len'. DPDK buf_len is only 16-bit wide ('allocated' was 32-bit), but it should be enough to store the number of allocated bytes. This will reduce 'struct dp_packet' size. Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dp-packet.c | 6 +++--- lib/dp-packet.h | 26 +++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 375b7b715..31fe9d330 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -25,7 +25,7 @@ static void dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source source) { - b->allocated = allocated; + dp_packet_set_allocated(b, allocated); b->source = source; b->l2_pad_size = 0; b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; @@ -243,7 +243,7 @@ dp_packet_resize__(struct dp_packet *b, size_t new_headroom, size_t new_tailroom OVS_NOT_REACHED(); } - b->allocated = new_allocated; + dp_packet_set_allocated(b, new_allocated); dp_packet_set_base(b, new_base); new_data = (char *) new_base + new_headroom; @@ -441,7 +441,7 @@ dp_packet_to_string(const struct dp_packet *b, size_t maxbytes) ds_init(&s); ds_put_format(&s, "size=%"PRIu32", allocated=%"PRIu32", head=%"PRIuSIZE", tail=%"PRIuSIZE"\n", - dp_packet_size(b), b->allocated, + dp_packet_size(b), dp_packet_get_allocated(b), dp_packet_headroom(b), dp_packet_tailroom(b)); ds_put_hex_dump(&s, dp_packet_data(b), MIN(dp_packet_size(b), maxbytes), 0, false); return ds_cstr(&s); diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 54a34453a..1d10d9985 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -44,12 +44,11 @@ struct dp_packet { struct rte_mbuf mbuf; /* DPDK mbuf */ #else void *base_; /* First byte of allocated space. */ + uint16_t allocated_; /* Number of bytes allocated. */ uint16_t data_ofs; /* First byte actually in use. */ uint32_t size_; /* Number of bytes in use. */ uint32_t rss_hash; /* Packet hash. */ #endif - uint32_t allocated; /* Number of bytes allocated. */ - enum dp_packet_source source; /* Source of memory allocated as 'base'. */ uint8_t l2_pad_size; /* Detected l2 padding size. * Padding is non-pullable. */ @@ -69,6 +68,9 @@ static inline void dp_packet_set_base(struct dp_packet *, void *); static inline uint32_t dp_packet_size(const struct dp_packet *); static inline void dp_packet_set_size(struct dp_packet *, uint32_t); +static inline uint16_t dp_packet_get_allocated(const struct dp_packet *); +static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t); + void * dp_packet_resize_l2(struct dp_packet *, int increment); void * dp_packet_resize_l2_5(struct dp_packet *, int increment); static inline void * dp_packet_l2(const struct dp_packet *); @@ -194,7 +196,7 @@ static inline void *dp_packet_tail(const struct dp_packet *b) * not necessarily in use) in 'b'. */ static inline void *dp_packet_end(const struct dp_packet *b) { - return (char *) dp_packet_base(b) + b->allocated; + return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); } /* Returns the number of bytes of headroom in 'b', that is, the number of bytes @@ -408,6 +410,15 @@ static inline void __packet_set_data(struct dp_packet *b, uint16_t v) b->mbuf.data_off = v; } +static inline uint16_t dp_packet_get_allocated(const struct dp_packet *b) +{ + return b->mbuf.buf_len; +} + +static inline void dp_packet_set_allocated(struct dp_packet *b, uint16_t s) +{ + b->mbuf.buf_len = s; +} #else static inline void * dp_packet_base(const struct dp_packet *b) { @@ -439,6 +450,15 @@ static inline void __packet_set_data(struct dp_packet *b, uint16_t v) b->data_ofs = v; } +static inline uint16_t dp_packet_get_allocated(const struct dp_packet *b) +{ + return b->allocated_; +} + +static inline void dp_packet_set_allocated(struct dp_packet *b, uint16_t s) +{ + b->allocated_ = s; +} #endif static inline void * dp_packet_data(const struct dp_packet *b) From 5a07c6e177af50a38bf37600c315f6d4a6b47b91 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:48 -0700 Subject: [PATCH 017/146] dp-packet: Style fixes. Also, removes an unused function Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dp-packet.h | 230 +++++++++++++++++++++++++++--------------------- 1 file changed, 131 insertions(+), 99 deletions(-) diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 1d10d9985..e4c25936b 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -33,7 +33,7 @@ enum OVS_PACKED_ENUM dp_packet_source { DPBUF_STACK, /* Un-movable stack space or static buffer. */ DPBUF_STUB, /* Starts on stack, may expand into heap. */ DPBUF_DPDK, /* buffer data is from DPDK allocated memory. - ref to build_dp_packet() in netdev-dpdk. */ + * ref to build_dp_packet() in netdev-dpdk. */ }; /* Buffer for holding packet data. A dp_packet is automatically reallocated @@ -60,9 +60,9 @@ struct dp_packet { struct pkt_metadata md; }; -static inline void * dp_packet_data(const struct dp_packet *); +static inline void *dp_packet_data(const struct dp_packet *); static inline void dp_packet_set_data(struct dp_packet *, void *); -static inline void * dp_packet_base(const struct dp_packet *); +static inline void *dp_packet_base(const struct dp_packet *); static inline void dp_packet_set_base(struct dp_packet *, void *); static inline uint32_t dp_packet_size(const struct dp_packet *); @@ -71,17 +71,17 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t); static inline uint16_t dp_packet_get_allocated(const struct dp_packet *); static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t); -void * dp_packet_resize_l2(struct dp_packet *, int increment); -void * dp_packet_resize_l2_5(struct dp_packet *, int increment); -static inline void * dp_packet_l2(const struct dp_packet *); +void *dp_packet_resize_l2(struct dp_packet *, int increment); +void *dp_packet_resize_l2_5(struct dp_packet *, int increment); +static inline void *dp_packet_l2(const struct dp_packet *); static inline void dp_packet_reset_offsets(struct dp_packet *); static inline uint8_t dp_packet_l2_pad_size(const struct dp_packet *); static inline void dp_packet_set_l2_pad_size(struct dp_packet *, uint8_t); -static inline void * dp_packet_l2_5(const struct dp_packet *); +static inline void *dp_packet_l2_5(const struct dp_packet *); static inline void dp_packet_set_l2_5(struct dp_packet *, void *); -static inline void * dp_packet_l3(const struct dp_packet *); +static inline void *dp_packet_l3(const struct dp_packet *); static inline void dp_packet_set_l3(struct dp_packet *, void *); -static inline void * dp_packet_l4(const struct dp_packet *); +static inline void *dp_packet_l4(const struct dp_packet *); static inline void dp_packet_set_l4(struct dp_packet *, void *); static inline size_t dp_packet_l4_size(const struct dp_packet *); static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *); @@ -94,26 +94,25 @@ void dp_packet_use(struct dp_packet *, void *, size_t); void dp_packet_use_stub(struct dp_packet *, void *, size_t); void dp_packet_use_const(struct dp_packet *, const void *, size_t); -void dp_packet_init_dpdk(struct dp_packet *b, size_t allocated); +void dp_packet_init_dpdk(struct dp_packet *, size_t allocated); void dp_packet_init(struct dp_packet *, size_t); void dp_packet_uninit(struct dp_packet *); -static inline void *dp_packet_get_uninit_pointer(struct dp_packet *); struct dp_packet *dp_packet_new(size_t); struct dp_packet *dp_packet_new_with_headroom(size_t, size_t headroom); struct dp_packet *dp_packet_clone(const struct dp_packet *); struct dp_packet *dp_packet_clone_with_headroom(const struct dp_packet *, - size_t headroom); + size_t headroom); struct dp_packet *dp_packet_clone_data(const void *, size_t); struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t, - size_t headroom); + size_t headroom); static inline void dp_packet_delete(struct dp_packet *); static inline void *dp_packet_at(const struct dp_packet *, size_t offset, - size_t size); -static inline void *dp_packet_at_assert(const struct dp_packet *, size_t offset, - size_t size); + size_t size); +static inline void *dp_packet_at_assert(const struct dp_packet *, + size_t offset, size_t size); static inline void *dp_packet_tail(const struct dp_packet *); static inline void *dp_packet_end(const struct dp_packet *); @@ -122,11 +121,11 @@ void *dp_packet_put_zeros(struct dp_packet *, size_t); void *dp_packet_put(struct dp_packet *, const void *, size_t); char *dp_packet_put_hex(struct dp_packet *, const char *s, size_t *n); void dp_packet_reserve(struct dp_packet *, size_t); -void dp_packet_reserve_with_tailroom(struct dp_packet *b, size_t headroom, - size_t tailroom); -void *dp_packet_push_uninit(struct dp_packet *b, size_t); +void dp_packet_reserve_with_tailroom(struct dp_packet *, size_t headroom, + size_t tailroom); +void *dp_packet_push_uninit(struct dp_packet *, size_t); void *dp_packet_push_zeros(struct dp_packet *, size_t); -void *dp_packet_push(struct dp_packet *b, const void *, size_t); +void *dp_packet_push(struct dp_packet *, const void *, size_t); static inline size_t dp_packet_headroom(const struct dp_packet *); static inline size_t dp_packet_tailroom(const struct dp_packet *); @@ -141,20 +140,13 @@ static inline void *dp_packet_try_pull(struct dp_packet *, size_t); void *dp_packet_steal_data(struct dp_packet *); char *dp_packet_to_string(const struct dp_packet *, size_t maxbytes); -static inline bool dp_packet_equal(const struct dp_packet *, const struct dp_packet *); +static inline bool dp_packet_equal(const struct dp_packet *, + const struct dp_packet *); -/* Returns a pointer that may be passed to free() to accomplish the same thing - * as dp_packet_uninit(b). The return value is a null pointer if dp_packet_uninit() - * would not free any memory. */ -static inline void *dp_packet_get_uninit_pointer(struct dp_packet *b) -{ - /* XXX: If 'source' is DPBUF_DPDK memory gets leaked! */ - return b && b->source == DPBUF_MALLOC ? dp_packet_base(b) : NULL; -} - /* Frees memory that 'b' points to, as well as 'b' itself. */ -static inline void dp_packet_delete(struct dp_packet *b) +static inline void +dp_packet_delete(struct dp_packet *b) { if (b) { if (b->source == DPBUF_DPDK) { @@ -171,52 +163,59 @@ static inline void dp_packet_delete(struct dp_packet *b) /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to * byte 'offset'. Otherwise, returns a null pointer. */ -static inline void *dp_packet_at(const struct dp_packet *b, size_t offset, - size_t size) +static inline void * +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size) { - return offset + size <= dp_packet_size(b) ? (char *) dp_packet_data(b) + offset : NULL; + return offset + size <= dp_packet_size(b) + ? (char *) dp_packet_data(b) + offset + : NULL; } /* Returns a pointer to byte 'offset' in 'b', which must contain at least * 'offset + size' bytes of data. */ -static inline void *dp_packet_at_assert(const struct dp_packet *b, size_t offset, - size_t size) +static inline void * +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size) { ovs_assert(offset + size <= dp_packet_size(b)); return ((char *) dp_packet_data(b)) + offset; } /* Returns a pointer to byte following the last byte of data in use in 'b'. */ -static inline void *dp_packet_tail(const struct dp_packet *b) +static inline void * +dp_packet_tail(const struct dp_packet *b) { return (char *) dp_packet_data(b) + dp_packet_size(b); } /* Returns a pointer to byte following the last byte allocated for use (but * not necessarily in use) in 'b'. */ -static inline void *dp_packet_end(const struct dp_packet *b) +static inline void * +dp_packet_end(const struct dp_packet *b) { return (char *) dp_packet_base(b) + dp_packet_get_allocated(b); } /* Returns the number of bytes of headroom in 'b', that is, the number of bytes * of unused space in dp_packet 'b' before the data that is in use. (Most - * commonly, the data in a dp_packet is at its beginning, and thus the dp_packet's - * headroom is 0.) */ -static inline size_t dp_packet_headroom(const struct dp_packet *b) + * commonly, the data in a dp_packet is at its beginning, and thus the + * dp_packet's headroom is 0.) */ +static inline size_t +dp_packet_headroom(const struct dp_packet *b) { - return (char*)dp_packet_data(b) - (char*)dp_packet_base(b); + return (char *) dp_packet_data(b) - (char *) dp_packet_base(b); } -/* Returns the number of bytes that may be appended to the tail end of dp_packet - * 'b' before the dp_packet must be reallocated. */ -static inline size_t dp_packet_tailroom(const struct dp_packet *b) +/* Returns the number of bytes that may be appended to the tail end of + * dp_packet 'b' before the dp_packet must be reallocated. */ +static inline size_t +dp_packet_tailroom(const struct dp_packet *b) { - return (char*)dp_packet_end(b) - (char*)dp_packet_tail(b); + return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b); } /* Clears any data from 'b'. */ -static inline void dp_packet_clear(struct dp_packet *b) +static inline void +dp_packet_clear(struct dp_packet *b) { dp_packet_set_data(b, dp_packet_base(b)); dp_packet_set_size(b, 0); @@ -224,11 +223,12 @@ static inline void dp_packet_clear(struct dp_packet *b) /* Removes 'size' bytes from the head end of 'b', which must contain at least * 'size' bytes of data. Returns the first byte of data removed. */ -static inline void *dp_packet_pull(struct dp_packet *b, size_t size) +static inline void * +dp_packet_pull(struct dp_packet *b, size_t size) { void *data = dp_packet_data(b); ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size); - dp_packet_set_data(b, (char*)dp_packet_data(b) + size); + dp_packet_set_data(b, (char *) dp_packet_data(b) + size); dp_packet_set_size(b, dp_packet_size(b) - size); return data; } @@ -236,28 +236,32 @@ static inline void *dp_packet_pull(struct dp_packet *b, size_t size) /* If 'b' has at least 'size' bytes of data, removes that many bytes from the * head end of 'b' and returns the first byte removed. Otherwise, returns a * null pointer without modifying 'b'. */ -static inline void *dp_packet_try_pull(struct dp_packet *b, size_t size) +static inline void * +dp_packet_try_pull(struct dp_packet *b, size_t size) { return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size ? dp_packet_pull(b, size) : NULL; } -static inline bool dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b) +static inline bool +dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b) { return dp_packet_size(a) == dp_packet_size(b) && - memcmp(dp_packet_data(a), dp_packet_data(b), dp_packet_size(a)) == 0; + !memcmp(dp_packet_data(a), dp_packet_data(b), dp_packet_size(a)); } /* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2 * headers, so return NULL if it is not set. */ -static inline void * dp_packet_l2(const struct dp_packet *b) +static inline void * +dp_packet_l2(const struct dp_packet *b) { return (b->l3_ofs != UINT16_MAX) ? dp_packet_data(b) : NULL; } /* Resets all layer offsets. 'l3' offset must be set before 'l2' can be * retrieved. */ -static inline void dp_packet_reset_offsets(struct dp_packet *b) +static inline void +dp_packet_reset_offsets(struct dp_packet *b) { b->l2_pad_size = 0; b->l2_5_ofs = UINT16_MAX; @@ -265,56 +269,65 @@ static inline void dp_packet_reset_offsets(struct dp_packet *b) b->l4_ofs = UINT16_MAX; } -static inline uint8_t dp_packet_l2_pad_size(const struct dp_packet *b) +static inline uint8_t +dp_packet_l2_pad_size(const struct dp_packet *b) { return b->l2_pad_size; } -static inline void dp_packet_set_l2_pad_size(struct dp_packet *b, uint8_t pad_size) +static inline void +dp_packet_set_l2_pad_size(struct dp_packet *b, uint8_t pad_size) { ovs_assert(pad_size <= dp_packet_size(b)); b->l2_pad_size = pad_size; } -static inline void * dp_packet_l2_5(const struct dp_packet *b) +static inline void * +dp_packet_l2_5(const struct dp_packet *b) { return b->l2_5_ofs != UINT16_MAX ? (char *) dp_packet_data(b) + b->l2_5_ofs : NULL; } -static inline void dp_packet_set_l2_5(struct dp_packet *b, void *l2_5) +static inline void +dp_packet_set_l2_5(struct dp_packet *b, void *l2_5) { b->l2_5_ofs = l2_5 ? (char *) l2_5 - (char *) dp_packet_data(b) : UINT16_MAX; } -static inline void * dp_packet_l3(const struct dp_packet *b) +static inline void * +dp_packet_l3(const struct dp_packet *b) { return b->l3_ofs != UINT16_MAX ? (char *) dp_packet_data(b) + b->l3_ofs : NULL; } -static inline void dp_packet_set_l3(struct dp_packet *b, void *l3) +static inline void +dp_packet_set_l3(struct dp_packet *b, void *l3) { b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) : UINT16_MAX; } -static inline void * dp_packet_l4(const struct dp_packet *b) +static inline void * +dp_packet_l4(const struct dp_packet *b) { return b->l4_ofs != UINT16_MAX ? (char *) dp_packet_data(b) + b->l4_ofs : NULL; } -static inline void dp_packet_set_l4(struct dp_packet *b, void *l4) +static inline void +dp_packet_set_l4(struct dp_packet *b, void *l4) { b->l4_ofs = l4 ? (char *) l4 - (char *) dp_packet_data(b) : UINT16_MAX; } -static inline size_t dp_packet_l4_size(const struct dp_packet *b) +static inline size_t +dp_packet_l4_size(const struct dp_packet *b) { return b->l4_ofs != UINT16_MAX ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l4(b) @@ -322,7 +335,8 @@ static inline size_t dp_packet_l4_size(const struct dp_packet *b) : 0; } -static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *b) +static inline const void * +dp_packet_get_tcp_payload(const struct dp_packet *b) { size_t l4_size = dp_packet_l4_size(b); @@ -337,25 +351,29 @@ static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *b) return NULL; } -static inline const void *dp_packet_get_udp_payload(const struct dp_packet *b) +static inline const void * +dp_packet_get_udp_payload(const struct dp_packet *b) { return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN) ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL; } -static inline const void *dp_packet_get_sctp_payload(const struct dp_packet *b) +static inline const void * +dp_packet_get_sctp_payload(const struct dp_packet *b) { return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN) ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL; } -static inline const void *dp_packet_get_icmp_payload(const struct dp_packet *b) +static inline const void * +dp_packet_get_icmp_payload(const struct dp_packet *b) { return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN) ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL; } -static inline const void *dp_packet_get_nd_payload(const struct dp_packet *b) +static inline const void * +dp_packet_get_nd_payload(const struct dp_packet *b) { return OVS_LIKELY(dp_packet_l4_size(b) >= ND_MSG_LEN) ? (const char *)dp_packet_l4(b) + ND_MSG_LEN : NULL; @@ -364,22 +382,26 @@ static inline const void *dp_packet_get_nd_payload(const struct dp_packet *b) #ifdef DPDK_NETDEV BUILD_ASSERT_DECL(offsetof(struct dp_packet, mbuf) == 0); -static inline void * dp_packet_base(const struct dp_packet *b) +static inline void * +dp_packet_base(const struct dp_packet *b) { return b->mbuf.buf_addr; } -static inline void dp_packet_set_base(struct dp_packet *b, void *d) +static inline void +dp_packet_set_base(struct dp_packet *b, void *d) { b->mbuf.buf_addr = d; } -static inline uint32_t dp_packet_size(const struct dp_packet *b) +static inline uint32_t +dp_packet_size(const struct dp_packet *b) { return b->mbuf.pkt_len; } -static inline void dp_packet_set_size(struct dp_packet *b, uint32_t v) +static inline void +dp_packet_set_size(struct dp_packet *b, uint32_t v) { /* netdev-dpdk does not currently support segmentation; consequently, for * all intents and purposes, 'data_len' (16 bit) and 'pkt_len' (32 bit) may @@ -388,102 +410,112 @@ static inline void dp_packet_set_size(struct dp_packet *b, uint32_t v) * On the datapath, it is expected that the size of packets * (and thus 'v') will always be <= UINT16_MAX; this means that there is no * loss of accuracy in assigning 'v' to 'data_len'. - * - * However, control ofpbufs may well be larger than UINT16_MAX (i.e. 'v' > - * UINT16_MAX); even though the value is truncated when assigned to - * 'data_len', loss of accuracy is avoided in this situation by using - * 'pkt_len' to represent the packet size. */ b->mbuf.data_len = (uint16_t)v; /* Current seg length. */ b->mbuf.pkt_len = v; /* Total length of all segments linked to * this segment. */ - } -static inline uint16_t __packet_data(const struct dp_packet *b) +static inline uint16_t +__packet_data(const struct dp_packet *b) { return b->mbuf.data_off; } -static inline void __packet_set_data(struct dp_packet *b, uint16_t v) +static inline void +__packet_set_data(struct dp_packet *b, uint16_t v) { b->mbuf.data_off = v; } -static inline uint16_t dp_packet_get_allocated(const struct dp_packet *b) +static inline uint16_t +dp_packet_get_allocated(const struct dp_packet *b) { return b->mbuf.buf_len; } -static inline void dp_packet_set_allocated(struct dp_packet *b, uint16_t s) +static inline void +dp_packet_set_allocated(struct dp_packet *b, uint16_t s) { b->mbuf.buf_len = s; } #else -static inline void * dp_packet_base(const struct dp_packet *b) +static inline void * +dp_packet_base(const struct dp_packet *b) { return b->base_; } -static inline void dp_packet_set_base(struct dp_packet *b, void *d) +static inline void +dp_packet_set_base(struct dp_packet *b, void *d) { b->base_ = d; } -static inline uint32_t dp_packet_size(const struct dp_packet *b) +static inline uint32_t +dp_packet_size(const struct dp_packet *b) { return b->size_; } -static inline void dp_packet_set_size(struct dp_packet *b, uint32_t v) +static inline void +dp_packet_set_size(struct dp_packet *b, uint32_t v) { b->size_ = v; } -static inline uint16_t __packet_data(const struct dp_packet *b) +static inline uint16_t +__packet_data(const struct dp_packet *b) { return b->data_ofs; } -static inline void __packet_set_data(struct dp_packet *b, uint16_t v) +static inline void +__packet_set_data(struct dp_packet *b, uint16_t v) { b->data_ofs = v; } -static inline uint16_t dp_packet_get_allocated(const struct dp_packet *b) +static inline uint16_t +dp_packet_get_allocated(const struct dp_packet *b) { return b->allocated_; } -static inline void dp_packet_set_allocated(struct dp_packet *b, uint16_t s) +static inline void +dp_packet_set_allocated(struct dp_packet *b, uint16_t s) { b->allocated_ = s; } #endif -static inline void * dp_packet_data(const struct dp_packet *b) +static inline void * +dp_packet_data(const struct dp_packet *b) { - return __packet_data(b) != UINT16_MAX ? - (char *)dp_packet_base(b) + __packet_data(b) : NULL; + return __packet_data(b) != UINT16_MAX + ? (char *) dp_packet_base(b) + __packet_data(b) : NULL; } -static inline void dp_packet_set_data(struct dp_packet *b, void *data) +static inline void +dp_packet_set_data(struct dp_packet *b, void *data) { if (data) { - __packet_set_data(b, (char *)data - (char *)dp_packet_base(b)); + __packet_set_data(b, (char *) data - (char *) dp_packet_base(b)); } else { __packet_set_data(b, UINT16_MAX); } } -static inline void dp_packet_reset_packet(struct dp_packet *b, int off) +static inline void +dp_packet_reset_packet(struct dp_packet *b, int off) { dp_packet_set_size(b, dp_packet_size(b) - off); dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off)); b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; } -static inline uint32_t dp_packet_get_rss_hash(struct dp_packet *p) +static inline uint32_t +dp_packet_get_rss_hash(struct dp_packet *p) { #ifdef DPDK_NETDEV return p->mbuf.hash.rss; @@ -492,8 +524,8 @@ static inline uint32_t dp_packet_get_rss_hash(struct dp_packet *p) #endif } -static inline void dp_packet_set_rss_hash(struct dp_packet *p, - uint32_t hash) +static inline void +dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash) { #ifdef DPDK_NETDEV p->mbuf.hash.rss = hash; From efa2bcbb358b0cb71524d5ad329edb495c123d11 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:49 -0700 Subject: [PATCH 018/146] dpif-netdev: Store pkt_metadata structure in dp_netdev_port. Initializing a struct pkt_metadata for every packet can be surprisingly expensive. It's much faster to keep a copy for each port and copying it on each packet. Suggested-by: Pravin Shelar Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dpif-netdev.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f1d65f57d..34d9d31a9 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -235,9 +235,9 @@ enum pmd_cycles_counter_type { /* A port in a netdev-based datapath. */ struct dp_netdev_port { - struct cmap_node node; /* Node in dp_netdev's 'ports'. */ - odp_port_t port_no; + struct pkt_metadata md; struct netdev *netdev; + struct cmap_node node; /* Node in dp_netdev's 'ports'. */ struct netdev_saved_flags *sf; struct netdev_rxq **rxq; struct ovs_refcount ref_cnt; @@ -1071,7 +1071,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, } } port = xzalloc(sizeof *port); - port->port_no = port_no; + port->md = PKT_METADATA_INITIALIZER(port_no); port->netdev = netdev; port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); port->type = xstrdup(type); @@ -1176,7 +1176,7 @@ dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no) struct dp_netdev_port *port; CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) { - if (port->port_no == port_no) { + if (port->md.in_port.odp_port == port_no) { return port; } } @@ -1286,7 +1286,8 @@ static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) OVS_REQUIRES(dp->port_mutex) { - cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no)); + cmap_remove(&dp->ports, &port->node, + hash_odp_port(port->md.in_port.odp_port)); seq_change(dp->port_seq); if (netdev_is_pmd(port->netdev)) { int numa_id = netdev_get_numa_id(port->netdev); @@ -1308,7 +1309,7 @@ answer_port_query(const struct dp_netdev_port *port, { dpif_port->name = xstrdup(netdev_get_name(port->netdev)); dpif_port->type = xstrdup(port->type); - dpif_port->port_no = port->port_no; + dpif_port->port_no = port->md.in_port.odp_port; } static int @@ -1435,7 +1436,7 @@ dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_, state->name = xstrdup(netdev_get_name(port->netdev)); dpif_port->name = state->name; dpif_port->type = port->type; - dpif_port->port_no = port->port_no; + dpif_port->port_no = port->md.in_port.odp_port; retval = 0; } else { @@ -2513,7 +2514,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, /* XXX: initialize md in netdev implementation. */ for (i = 0; i < cnt; i++) { - packets[i]->md = PKT_METADATA_INITIALIZER(port->port_no); + packets[i]->md = port->md; } cycles_count_start(pmd); dp_netdev_input(pmd, packets, cnt); @@ -3611,12 +3612,12 @@ dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED, } /* Remove old port. */ - cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no)); + cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->md.in_port.odp_port)); ovsrcu_postpone(free, old_port); /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */ new_port = xmemdup(old_port, sizeof *old_port); - new_port->port_no = port_no; + new_port->md.in_port.odp_port = port_no; cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no)); seq_change(dp->port_seq); @@ -3647,7 +3648,7 @@ dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED, ovs_mutex_lock(&dp->port_mutex); if (get_port_by_name(dp, argv[2], &port)) { unixctl_command_reply_error(conn, "unknown port"); - } else if (port->port_no == ODPP_LOCAL) { + } else if (port->md.in_port.odp_port == ODPP_LOCAL) { unixctl_command_reply_error(conn, "can't delete local port"); } else { do_del_port(dp, port); From 11e5cf1f9043637613502f4477c8fd5ce6d65f12 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:50 -0700 Subject: [PATCH 019/146] dpif-netdev: Store batch pointer in dp_netdev_flow. The userspace datapath 1. receives a batch of packets. 2. finds a 'netdev_flow' (megaflow) for each packet. 3. groups the packets in output batches based on the 'netdev_flow'. Until now the grouping (2) was done using a simple algorithm with a O(N^2) runtime, where N is the number of distinct megaflows of the packets in the incoming batch. This could quickly become a bottleneck, even with a small number of megaflows. With this commit the datapath simply stores in the 'netdev_flow' (the megaflow) a pointer to the output batch, if one has been created for the current input batch. The pointer will be cleared when the output batch is sent. In a simple phy2phy test with 128 megaflows the throughput is more than doubled. The reason that stopped us from doing this change was that the 'netdev_flow' memory was shared between multiple threads: this is no longer the case with the per-thread classifier. Also, this commit reorders struct dp_netdev_flow to group toghether the members used in the fastpath. Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dpif-netdev.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 34d9d31a9..e09ffc2cb 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -290,13 +290,11 @@ struct dp_netdev_flow_stats { * requires synchronization, as noted in more detail below. */ struct dp_netdev_flow { - bool dead; - + const struct flow flow; /* Unmasked flow that created this entry. */ /* Hash table index by unmasked flow. */ const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */ /* 'flow_table'. */ const ovs_u128 ufid; /* Unique flow identifier. */ - const struct flow flow; /* Unmasked flow that created this entry. */ const int pmd_id; /* The 'core_id' of pmd thread owning this */ /* flow. */ @@ -306,12 +304,20 @@ struct dp_netdev_flow { * reference. */ struct ovs_refcount ref_cnt; + bool dead; + /* Statistics. */ struct dp_netdev_flow_stats stats; /* Actions. */ OVSRCU_TYPE(struct dp_netdev_actions *) actions; + /* While processing a group of input packets, the datapath uses the next + * member to store a pointer to the output batch for the flow. It is + * reset after the batch has been sent out (See dp_netdev_queue_batches(), + * packet_batch_init() and packet_batch_execute()). */ + struct packet_batch *batch; + /* Packet classification. */ struct dpcls_rule cr; /* In owning dp_netdev's 'cls'. */ /* 'cr' must be the last member. */ @@ -1975,6 +1981,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); memset(&flow->stats, 0, sizeof flow->stats); flow->dead = false; + flow->batch = NULL; *CONST_CAST(int *, &flow->pmd_id) = pmd->core_id; *CONST_CAST(struct flow *, &flow->flow) = match->flow; *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; @@ -3043,8 +3050,9 @@ packet_batch_update(struct packet_batch *batch, struct dp_packet *packet, static inline void packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow) { - batch->flow = flow; + flow->batch = batch; + batch->flow = flow; batch->packet_count = 0; batch->byte_count = 0; batch->tcp_flags = 0; @@ -3059,7 +3067,8 @@ packet_batch_execute(struct packet_batch *batch, struct dp_netdev_actions *actions; struct dp_netdev_flow *flow = batch->flow; - dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count, + flow->batch = NULL; + dp_netdev_flow_used(flow, batch->packet_count, batch->byte_count, batch->tcp_flags, now); actions = dp_netdev_flow_get_actions(flow); @@ -3076,25 +3085,19 @@ dp_netdev_queue_batches(struct dp_packet *pkt, struct packet_batch *batches, size_t *n_batches, size_t max_batches) { - struct packet_batch *batch = NULL; - int j; + struct packet_batch *batch; if (OVS_UNLIKELY(!flow)) { return false; } - /* XXX: This O(n^2) algortihm makes sense if we're operating under the - * assumption that the number of distinct flows (and therefore the - * number of distinct batches) is quite small. If this turns out not - * to be the case, it may make sense to pre sort based on the - * netdev_flow pointer. That done we can get the appropriate batching - * in O(n * log(n)) instead. */ - for (j = *n_batches - 1; j >= 0; j--) { - if (batches[j].flow == flow) { - batch = &batches[j]; - packet_batch_update(batch, pkt, mf); - return true; - } + + batch = flow->batch; + + if (OVS_LIKELY(batch)) { + packet_batch_update(batch, pkt, mf); + return true; } + if (OVS_UNLIKELY(*n_batches >= max_batches)) { return false; } From 8aaa125dab663595329063560b33fe7d62839b7a Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 18 May 2015 10:47:51 -0700 Subject: [PATCH 020/146] dpif-netdev: Share emc and fast path output batches. Until now the exact match cache processing was able to handle only four megaflows. The rest of the packets was passed to the megaflow classifier. The limit was arbitraly set to four also because the algorithm used to group packets in output batches didn't perform well with a lot of megaflows. After changing the algorithm and after some performance testing it seems much better just to share the same output batches between the exact match cache and the megaflow classifier. Signed-off-by: Daniele Di Proietto Acked-by: Pravin B Shelar --- lib/dpif-netdev.c | 89 +++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 54 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e09ffc2cb..477e32c23 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3061,7 +3061,6 @@ packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow) static inline void packet_batch_execute(struct packet_batch *batch, struct dp_netdev_pmd_thread *pmd, - enum dp_stat_type hit_type, long long now) { struct dp_netdev_actions *actions; @@ -3075,37 +3074,23 @@ packet_batch_execute(struct packet_batch *batch, dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true, actions->actions, actions->size); - - dp_netdev_count_packet(pmd, hit_type, batch->packet_count); } -static inline bool +static inline void dp_netdev_queue_batches(struct dp_packet *pkt, struct dp_netdev_flow *flow, const struct miniflow *mf, - struct packet_batch *batches, size_t *n_batches, - size_t max_batches) + struct packet_batch *batches, size_t *n_batches) { - struct packet_batch *batch; - - if (OVS_UNLIKELY(!flow)) { - return false; - } - - batch = flow->batch; + struct packet_batch *batch = flow->batch; if (OVS_LIKELY(batch)) { packet_batch_update(batch, pkt, mf); - return true; - } - - if (OVS_UNLIKELY(*n_batches >= max_batches)) { - return false; + return; } batch = &batches[(*n_batches)++]; packet_batch_init(batch, flow); packet_batch_update(batch, pkt, mf); - return true; } static inline void @@ -3117,24 +3102,22 @@ dp_packet_swap(struct dp_packet **a, struct dp_packet **b) } /* Try to process all ('cnt') the 'packets' using only the exact match cache - * 'flow_cache'. If a flow is not found for a packet 'packets[i]', or if there - * is no matching batch for a packet's flow, the miniflow is copied into 'keys' - * and the packet pointer is moved at the beginning of the 'packets' array. + * 'flow_cache'. If a flow is not found for a packet 'packets[i]', the + * miniflow is copied into 'keys' and the packet pointer is moved at the + * beginning of the 'packets' array. * * The function returns the number of packets that needs to be processed in the * 'packets' array (they have been moved to the beginning of the vector). */ static inline size_t emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, - size_t cnt, struct netdev_flow_key *keys, long long now) + size_t cnt, struct netdev_flow_key *keys, + struct packet_batch batches[], size_t *n_batches) { - struct netdev_flow_key key; - struct packet_batch batches[4]; struct emc_cache *flow_cache = &pmd->flow_cache; - size_t n_batches, i; - size_t notfound_cnt = 0; + struct netdev_flow_key key; + size_t i, notfound_cnt = 0; - n_batches = 0; miniflow_initialize(&key.mf, key.buf); for (i = 0; i < cnt; i++) { struct dp_netdev_flow *flow; @@ -3149,9 +3132,10 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, key.hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.mf); flow = emc_lookup(flow_cache, &key); - if (OVS_UNLIKELY(!dp_netdev_queue_batches(packets[i], flow, &key.mf, - batches, &n_batches, - ARRAY_SIZE(batches)))) { + if (OVS_LIKELY(flow)) { + dp_netdev_queue_batches(packets[i], flow, &key.mf, batches, + n_batches); + } else { if (i != notfound_cnt) { dp_packet_swap(&packets[i], &packets[notfound_cnt]); } @@ -3160,9 +3144,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, } } - for (i = 0; i < n_batches; i++) { - packet_batch_execute(&batches[i], pmd, DP_STAT_EXACT_HIT, now); - } + dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT, cnt - notfound_cnt); return notfound_cnt; } @@ -3170,7 +3152,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, static inline void fast_path_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, size_t cnt, - struct netdev_flow_key *keys, long long now) + struct netdev_flow_key *keys, + struct packet_batch batches[], size_t *n_batches) { #if !defined(__CHECKER__) && !defined(_WIN32) const size_t PKT_ARRAY_SIZE = cnt; @@ -3178,12 +3161,12 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, /* Sparse or MSVC doesn't like variable length array. */ enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; #endif - struct packet_batch batches[PKT_ARRAY_SIZE]; struct dpcls_rule *rules[PKT_ARRAY_SIZE]; struct dp_netdev *dp = pmd->dp; struct emc_cache *flow_cache = &pmd->flow_cache; - size_t n_batches, i; + int miss_cnt = 0, lost_cnt = 0; bool any_miss; + size_t i; for (i = 0; i < cnt; i++) { /* Key length is needed in all the cases, hash computed on demand. */ @@ -3193,7 +3176,6 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) { uint64_t actions_stub[512 / 8], slow_stub[512 / 8]; struct ofpbuf actions, put_actions; - int miss_cnt = 0, lost_cnt = 0; ovs_u128 ufid; ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub); @@ -3265,23 +3247,17 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, ofpbuf_uninit(&actions); ofpbuf_uninit(&put_actions); fat_rwlock_unlock(&dp->upcall_rwlock); - dp_netdev_count_packet(pmd, DP_STAT_MISS, miss_cnt); dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt); } else if (OVS_UNLIKELY(any_miss)) { - int dropped_cnt = 0; - for (i = 0; i < cnt; i++) { if (OVS_UNLIKELY(!rules[i])) { dp_packet_delete(packets[i]); - dropped_cnt++; + lost_cnt++; + miss_cnt++; } } - - dp_netdev_count_packet(pmd, DP_STAT_MISS, dropped_cnt); - dp_netdev_count_packet(pmd, DP_STAT_LOST, dropped_cnt); } - n_batches = 0; for (i = 0; i < cnt; i++) { struct dp_packet *packet = packets[i]; struct dp_netdev_flow *flow; @@ -3293,13 +3269,12 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); emc_insert(flow_cache, &keys[i], flow); - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, - &n_batches, ARRAY_SIZE(batches)); + dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); } - for (i = 0; i < n_batches; i++) { - packet_batch_execute(&batches[i], pmd, DP_STAT_MASKED_HIT, now); - } + dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt); + dp_netdev_count_packet(pmd, DP_STAT_MISS, miss_cnt); + dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt); } static void @@ -3313,12 +3288,18 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd, enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; #endif struct netdev_flow_key keys[PKT_ARRAY_SIZE]; + struct packet_batch batches[PKT_ARRAY_SIZE]; long long now = time_msec(); - size_t newcnt; + size_t newcnt, n_batches, i; - newcnt = emc_processing(pmd, packets, cnt, keys, now); + n_batches = 0; + newcnt = emc_processing(pmd, packets, cnt, keys, batches, &n_batches); if (OVS_UNLIKELY(newcnt)) { - fast_path_processing(pmd, packets, newcnt, keys, now); + fast_path_processing(pmd, packets, newcnt, keys, batches, &n_batches); + } + + for (i = 0; i < n_batches; i++) { + packet_batch_execute(&batches[i], pmd, now); } } From 2fe3020f5cb62033218c273b7c92ed9203983d62 Mon Sep 17 00:00:00 2001 From: Alin Serdean Date: Mon, 4 May 2015 16:44:49 +0000 Subject: [PATCH 021/146] datapath-windows: Fix warning from the powershell module This patch fixes the warning when datapath-windows/misc/OVS.psm1 is imported. Signed-off-by: Alin Gabriel Serdean Reported-by: Hemanth Kumar Mantri Reported-at: https://github.com/openvswitch/ovs-issues/issues/69 Acked-by: Eitan Eliahu Signed-off-by: Gurucharan Shetty --- datapath-windows/misc/OVS.psm1 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datapath-windows/misc/OVS.psm1 b/datapath-windows/misc/OVS.psm1 index d6b6b0047..a8ffcaefd 100644 --- a/datapath-windows/misc/OVS.psm1 +++ b/datapath-windows/misc/OVS.psm1 @@ -1,5 +1,5 @@ <# -Copyright 2014 Cloudbase Solutions Srl +Copyright 2014, 2015 Cloudbase Solutions Srl Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ function Set-VMNetworkAdapterOVSPort $retVal = $vsms.ModifyResourceSettings(@($sd.GetText(1))) try { - Check-WMIReturnValue $retVal + CheckWMIReturnValue $retVal } catch { @@ -142,7 +142,7 @@ function Get-VMNetworkAdapterWithOVSPort } } -function Check-WMIReturnValue($retVal) +function CheckWMIReturnValue($retVal) { if ($retVal.ReturnValue -ne 0) { @@ -206,3 +206,5 @@ function Set-VMNetworkAdapterOVSPortDirect $vnic[0] | Set-VMNetworkAdapterOVSPort -OVSPortName $OVSPortName } } + +Export-ModuleMember -function Set-*, Get-* From fa7de3220ce79ffe293d84cc9c98b1a88980273b Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 18 May 2015 18:10:29 -0700 Subject: [PATCH 022/146] ovs-ofctl: Always prints recirc_id in decimal The output of 'ovs-ofctl dump-flows' command prints recirc_id in decimal in action parts of the output, while prints that in hex in matching parts of the same output. This patch fixes the inconsistency by always printing recirc_id values in decimal. Reported-by: Justin Pettit Signed-off-by: Andy Zhou Acked-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/match.c | 24 +++++++++++++++++++----- tests/ofproto-dpif.at | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/match.c b/lib/match.c index 7d0b4095f..b1550842a 100644 --- a/lib/match.c +++ b/lib/match.c @@ -843,16 +843,30 @@ format_be32_masked(struct ds *s, const char *name, } static void -format_uint32_masked(struct ds *s, const char *name, - uint32_t value, uint32_t mask) +format_uint32_masked__(struct ds *s, const char *name, + uint32_t value, uint32_t mask, const char *format) { if (mask) { - ds_put_format(s, "%s=%#"PRIx32, name, value); + ds_put_format(s, format, name, value); if (mask != UINT32_MAX) { ds_put_format(s, "/%#"PRIx32, mask); } ds_put_char(s, ','); } + +} +static void +format_uint32_masked(struct ds *s, const char *name, + uint32_t value, uint32_t mask) +{ + format_uint32_masked__(s, name, value, mask, "%s=%#"PRIx32); +} + +static void +format_decimal_uint32_masked(struct ds *s, const char *name, + uint32_t value, uint32_t mask) +{ + format_uint32_masked__(s, name, value, mask, "%s=%"PRIu32); } static void @@ -921,8 +935,8 @@ match_format(const struct match *match, struct ds *s, int priority) format_uint32_masked(s, "pkt_mark", f->pkt_mark, wc->masks.pkt_mark); if (wc->masks.recirc_id) { - format_uint32_masked(s, "recirc_id", f->recirc_id, - wc->masks.recirc_id); + format_decimal_uint32_masked(s, "recirc_id", f->recirc_id, + wc->masks.recirc_id); } if (wc->masks.dp_hash) { diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 139dfdd6b..3361dc2d8 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -186,7 +186,7 @@ table=0 priority=2 in_port=5 dl_vlan=1 actions=drop AT_CHECK([ovs-ofctl add-flows br-int flows.txt]) # Sends a packet to trigger recirculation. -# Should generate recirc_id(0x2),dp_hash(0xc1261ba2/0xff). +# Should generate recirc_id(2),dp_hash(0xc1261ba2/0xff). AT_CHECK([ovs-appctl netdev-dummy/receive p5 "in_port(5),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1)"]) # Collects flow stats. @@ -195,7 +195,7 @@ AT_CHECK([ovs-appctl revalidator/purge], [0]) # Checks the flow stats in br1, should only be one flow with non-zero # 'n_packets' from internal table. AT_CHECK([ovs-appctl bridge/dump-flows br1 | ofctl_strip | grep -- "n_packets" | grep -- "table_id" | sed -e 's/dp_hash=0x[[0-9a-f]][[0-9a-f]]*/dp_hash=0x0/' -e 's/output:[[0-9]][[0-9]]*/output/'], [0], [dnl -table_id=254, n_packets=1, n_bytes=64, priority=20,recirc_id=0x2,dp_hash=0x0/0xff,actions=output +table_id=254, n_packets=1, n_bytes=64, priority=20,recirc_id=2,dp_hash=0x0/0xff,actions=output ]) # Checks the flow stats in br-int, should be only one match. From bce01e3a89ac4b05d9e81408aa57717f2776f0be Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Mon, 18 May 2015 08:49:24 -0700 Subject: [PATCH 023/146] netdev-dpdk: Fix sparse warnings. These are all minor style issues. Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto --- lib/netdev-dpdk.c | 22 ++++++++++++++-------- tests/dpdk/ring_client.c | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 505ab751e..5f8c60f26 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -102,7 +102,7 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) #define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ /* Character device cuse_dev_name. */ -char *cuse_dev_name = NULL; +static char *cuse_dev_name = NULL; /* * Maximum amount of time in micro seconds to try and enqueue to vhost. @@ -173,7 +173,7 @@ static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex) /* This mutex must be used by non pmd threads when allocating or freeing * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may * use mempools, a non pmd thread should hold this mutex while calling them */ -struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER; +static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER; struct dpdk_mp { struct rte_mempool *mp; @@ -589,7 +589,7 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[], } cport = dev_name + strlen(prefix); - *port_no = strtol(cport, 0, 0); /* string must be null terminated */ + *port_no = strtol(cport, NULL, 0); /* string must be null terminated */ return 0; } @@ -1004,8 +1004,14 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts, int cnt) OVS_NO_THREAD_SAFETY_ANALYSIS { +#if !defined(__CHECKER__) && !defined(_WIN32) + const size_t PKT_ARRAY_SIZE = cnt; +#else + /* Sparse or MSVC doesn't like variable length array. */ + enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; +#endif struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - struct rte_mbuf *mbufs[cnt]; + struct rte_mbuf *mbufs[PKT_ARRAY_SIZE]; int dropped = 0; int newcnt = 0; int i; @@ -1650,7 +1656,7 @@ netdev_dpdk_get_virtio(const struct netdev_dpdk *dev) * These callbacks allow virtio-net devices to be added to vhost ports when * configuration has been fully complete. */ -const struct virtio_net_device_ops virtio_net_device_ops = +static const struct virtio_net_device_ops virtio_net_device_ops = { .new_device = new_device, .destroy_device = destroy_device, @@ -1957,7 +1963,7 @@ dpdk_init(int argc, char **argv) return result + 1 + base; } -const struct netdev_class dpdk_class = +static const struct netdev_class dpdk_class = NETDEV_DPDK_CLASS( "dpdk", NULL, @@ -1971,7 +1977,7 @@ const struct netdev_class dpdk_class = netdev_dpdk_get_status, netdev_dpdk_rxq_recv); -const struct netdev_class dpdk_ring_class = +static const struct netdev_class dpdk_ring_class = NETDEV_DPDK_CLASS( "dpdkr", NULL, @@ -1985,7 +1991,7 @@ const struct netdev_class dpdk_ring_class = netdev_dpdk_get_status, netdev_dpdk_rxq_recv); -const struct netdev_class dpdk_vhost_class = +static const struct netdev_class dpdk_vhost_class = NETDEV_DPDK_CLASS( "dpdkvhost", dpdk_vhost_class_init, diff --git a/tests/dpdk/ring_client.c b/tests/dpdk/ring_client.c index 97d32fccd..aeaeaca61 100644 --- a/tests/dpdk/ring_client.c +++ b/tests/dpdk/ring_client.c @@ -131,7 +131,7 @@ parse_app_args(int argc, char *argv[]) char **argvopt = argv; const char *progname = NULL; static struct option lgopts[] = { - {NULL, 0, 0, 0 } + {NULL, 0, NULL, 0 } }; progname = argv[0]; From cd159f1a82674eca96e8e2c0f184e3abac92172d Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Sat, 16 May 2015 08:18:20 -0700 Subject: [PATCH 024/146] dpdk: Ditch MAX_PKT_BURST macro. The MAX_PKT_BURST and NETDEV_MAX_RX_BATCH macros had a confusing relationship. They basically purport to do the same thing, making it unclear which is the source of truth. Furthermore, while NETDEV_MAX_RX_BATCH was 256, MAX_PKT_BURST was 32, meaning we never process a batch larger than 32 packets further adding to the confusion. This patch resolves the issue by removing MAX_PKT_BURST completely, and shrinking the new NETDEV_MAX_BURST macro to only 32. This should have no change in the execution path except shrinking a couple of structs and memory allocations (can't hurt). Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto --- lib/dpif-netdev.c | 16 ++++++++-------- lib/netdev-dpdk.c | 9 +++------ lib/netdev.h | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 477e32c23..fb01a02d3 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2508,7 +2508,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_port *port, struct netdev_rxq *rxq) { - struct dp_packet *packets[NETDEV_MAX_RX_BATCH]; + struct dp_packet *packets[NETDEV_MAX_BURST]; int error, cnt; cycles_count_start(pmd); @@ -3035,7 +3035,7 @@ struct packet_batch { struct dp_netdev_flow *flow; - struct dp_packet *packets[NETDEV_MAX_RX_BATCH]; + struct dp_packet *packets[NETDEV_MAX_BURST]; }; static inline void @@ -3159,7 +3159,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, const size_t PKT_ARRAY_SIZE = cnt; #else /* Sparse or MSVC doesn't like variable length array. */ - enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; + enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST }; #endif struct dpcls_rule *rules[PKT_ARRAY_SIZE]; struct dp_netdev *dp = pmd->dp; @@ -3285,7 +3285,7 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd, const size_t PKT_ARRAY_SIZE = cnt; #else /* Sparse or MSVC doesn't like variable length array. */ - enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; + enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST }; #endif struct netdev_flow_key keys[PKT_ARRAY_SIZE]; struct packet_batch batches[PKT_ARRAY_SIZE]; @@ -3382,7 +3382,7 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt, case OVS_ACTION_ATTR_TUNNEL_PUSH: if (*depth < MAX_RECIRC_DEPTH) { - struct dp_packet *tnl_pkt[NETDEV_MAX_RX_BATCH]; + struct dp_packet *tnl_pkt[NETDEV_MAX_BURST]; int err; if (!may_steal) { @@ -3408,7 +3408,7 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt, p = dp_netdev_lookup_port(dp, portno); if (p) { - struct dp_packet *tnl_pkt[NETDEV_MAX_RX_BATCH]; + struct dp_packet *tnl_pkt[NETDEV_MAX_BURST]; int err; if (!may_steal) { @@ -3470,7 +3470,7 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt, case OVS_ACTION_ATTR_RECIRC: if (*depth < MAX_RECIRC_DEPTH) { - struct dp_packet *recirc_pkts[NETDEV_MAX_RX_BATCH]; + struct dp_packet *recirc_pkts[NETDEV_MAX_BURST]; if (!may_steal) { dp_netdev_clone_pkt_batch(recirc_pkts, packets, cnt); @@ -3829,7 +3829,7 @@ dpcls_lookup(const struct dpcls *cls, const struct netdev_flow_key keys[], #if !defined(__CHECKER__) && !defined(_WIN32) const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS); #else - enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_RX_BATCH, MAP_BITS) }; + enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) }; #endif map_type maps[N_MAPS]; struct dpcls_subtable *subtable; diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 5f8c60f26..124b11506 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -99,8 +99,6 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) #define TX_HTHRESH 0 /* Default values of TX host threshold reg. */ #define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */ -#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ - /* Character device cuse_dev_name. */ static char *cuse_dev_name = NULL; @@ -862,7 +860,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_, nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid, vhost_dev->dpdk_mp->mp, (struct rte_mbuf **)packets, - MAX_PKT_BURST); + NETDEV_MAX_BURST); if (!nb_rx) { return EAGAIN; } @@ -889,8 +887,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, nb_rx = rte_eth_rx_burst(rx->port_id, rxq_->queue_id, (struct rte_mbuf **) packets, - MIN((int) NETDEV_MAX_RX_BATCH, - (int) MAX_PKT_BURST)); + NETDEV_MAX_BURST); if (!nb_rx) { return EAGAIN; } @@ -1008,7 +1005,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts, const size_t PKT_ARRAY_SIZE = cnt; #else /* Sparse or MSVC doesn't like variable length array. */ - enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH }; + enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST }; #endif struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); struct rte_mbuf *mbufs[PKT_ARRAY_SIZE]; diff --git a/lib/netdev.h b/lib/netdev.h index 71c0af1b5..9d412ee6c 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -338,7 +338,7 @@ typedef void netdev_dump_queue_stats_cb(unsigned int queue_id, int netdev_dump_queue_stats(const struct netdev *, netdev_dump_queue_stats_cb *, void *aux); -enum { NETDEV_MAX_RX_BATCH = 256 }; /* Maximum number packets in rx_recv() batch. */ +enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ extern struct seq *tnl_conf_seq; #ifdef __cplusplus From ccf7b34ead19f820c5f90fedd1b6caafb7dd9db1 Mon Sep 17 00:00:00 2001 From: Dan McGregor Date: Tue, 19 May 2015 12:24:26 -0700 Subject: [PATCH 025/146] netdev-bsd: Include net/bpf.h. The documentation says it is required to use bpf ioctls on both NetBSD and FreeBSD. It causes a compile time failure on FreeBSD 10. Signed-off-by: Dan McGregor Signed-off-by: Ben Pfaff --- lib/netdev-bsd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index b3075dc82..03101a8db 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include From 1190bedaeaaa11c9098fc1ddbe5b01ec7efbc8f4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 20 May 2015 18:46:00 -0700 Subject: [PATCH 026/146] AUTHORS: Add Dan McGregor. Signed-off-by: Ben Pfaff --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 891cfe976..5178b4346 100644 --- a/AUTHORS +++ b/AUTHORS @@ -35,6 +35,7 @@ Chuck Short zulcss@ubuntu.com Cong Wang amwang@redhat.com Damien Millescamps damien.millescamps@6wind.com Dan Carpenter dan.carpenter@oracle.com +Dan McGregor dan.mcgregor@usask.ca Dan Wendlandt dan@nicira.com Daniel Borkmann dborkman@redhat.com Daniel Hiltgen daniel@netkine.com From fc82e877efc03400e65b44588fb40eb507a98bf4 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 13 May 2015 14:54:56 +0100 Subject: [PATCH 027/146] dpif-netdev: Increase the number of EMC entries Prior to this commit, the number of possible entries in the Exact Match Cache stood at 1024 per thread exacting to 0.18Mb. A typical server system will have 2.5Mb cache per core meaning a larger EMC will comfortably fit in. This patch increases the number of entries to 8192 per thread (1.4Mb) which in turn yields improved throughput when processing multiple flows of traffic. Signed-off-by: Ciara Loftus Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto Acked-by: Ethan Jackson --- lib/dpif-netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index fb01a02d3..b4a42eb11 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -117,7 +117,7 @@ struct netdev_flow_key { * If dp_netdev_input is not called from a pmd thread, a mutex is used. */ -#define EM_FLOW_HASH_SHIFT 10 +#define EM_FLOW_HASH_SHIFT 13 #define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT) #define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1) #define EM_FLOW_HASH_SEGS 2 From 9154f798ef0011ea9d1d7fb1dc91b51b60da82d3 Mon Sep 17 00:00:00 2001 From: Kevin Traynor Date: Thu, 21 May 2015 17:26:48 +0100 Subject: [PATCH 028/146] netdev-dpdk: Use default NIC configuration. This patch simplifies Rx/Tx NIC configuration by removing custom values and using the defaults provided by the DPDK PMDs. This also enables Rx vectorisation which improves performance. Signed-off-by: Kevin Traynor Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto --- lib/netdev-dpdk.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 124b11506..02a003280 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -90,15 +90,6 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) #define NIC_PORT_RX_Q_SIZE 2048 /* Size of Physical NIC RX Queue, Max (n+32<=4096)*/ #define NIC_PORT_TX_Q_SIZE 2048 /* Size of Physical NIC TX Queue, Max (n+32<=4096)*/ -/* XXX: Needs per NIC value for these constants. */ -#define RX_PTHRESH 32 /* Default values of RX prefetch threshold reg. */ -#define RX_HTHRESH 32 /* Default values of RX host threshold reg. */ -#define RX_WTHRESH 16 /* Default values of RX write-back threshold reg. */ - -#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */ -#define TX_HTHRESH 0 /* Default values of TX host threshold reg. */ -#define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */ - /* Character device cuse_dev_name. */ static char *cuse_dev_name = NULL; @@ -128,25 +119,6 @@ static const struct rte_eth_conf port_conf = { }, }; -static const struct rte_eth_rxconf rx_conf = { - .rx_thresh = { - .pthresh = RX_PTHRESH, - .hthresh = RX_HTHRESH, - .wthresh = RX_WTHRESH, - }, -}; - -static const struct rte_eth_txconf tx_conf = { - .tx_thresh = { - .pthresh = TX_PTHRESH, - .hthresh = TX_HTHRESH, - .wthresh = TX_WTHRESH, - }, - .tx_free_thresh = 0, - .tx_rs_thresh = 0, - .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS|ETH_TXQ_FLAGS_NOOFFLOADS, -}; - enum { MAX_TX_QUEUE_LEN = 384 }; enum { DPDK_RING_SIZE = 256 }; BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE)); @@ -449,7 +421,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) for (i = 0; i < dev->up.n_txq; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, - dev->socket_id, &tx_conf); + dev->socket_id, NULL); if (diag) { VLOG_ERR("eth dev tx queue setup error %d",diag); return -diag; @@ -459,7 +431,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) for (i = 0; i < dev->up.n_rxq; i++) { diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, dev->socket_id, - &rx_conf, dev->dpdk_mp->mp); + NULL, dev->dpdk_mp->mp); if (diag) { VLOG_ERR("eth dev rx queue setup error %d",diag); return -diag; From 603f2ce04d000892cc4db841cff7b3b3fc95bb6c Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Wed, 20 May 2015 16:55:17 -0700 Subject: [PATCH 029/146] dpif-netdev: Clear flow batches before execute. When executing actions, it's possible a recirculation will occur causing dp_netdev_input() to be called multiple times. If the batch pointers embedded in dp_netdev_flow aren't cleared, it's possible packets after the recirculation will be reinserted into a batch associated with the original lookup. This could be very bad. This patch fixes the problem by zeroing out flow batch pointers before calling packet_batch_execute(). This probably has a slightly negative performance impact, though I haven't tried it. Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto --- lib/dpif-netdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index b4a42eb11..ff583e7b1 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3066,7 +3066,6 @@ packet_batch_execute(struct packet_batch *batch, struct dp_netdev_actions *actions; struct dp_netdev_flow *flow = batch->flow; - flow->batch = NULL; dp_netdev_flow_used(flow, batch->packet_count, batch->byte_count, batch->tcp_flags, now); @@ -3298,6 +3297,10 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd, fast_path_processing(pmd, packets, newcnt, keys, batches, &n_batches); } + for (i = 0; i < n_batches; i++) { + batches[i].flow->batch = NULL; + } + for (i = 0; i < n_batches; i++) { packet_batch_execute(&batches[i], pmd, now); } From 048963aa8507f3627bf3c9b4cbe4be4a46845b42 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Wed, 22 Apr 2015 19:22:52 +0100 Subject: [PATCH 030/146] dpif-netdev: Reset RSS hash when recirculating. Having the same RSS hash after recirculation can cause unnecessary collisions in the exact match cache. A simple solution is to rehash it with the recirculation depth if it is non-zero. Suggested-by: Ethan Jackson Signed-off-by: Daniele Di Proietto Signed-off-by: Ethan Jackson Acked-by: Ethan Jackson --- lib/dpif-netdev.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ff583e7b1..22fba7e6d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3015,16 +3015,24 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_, } static inline uint32_t -dpif_netdev_packet_get_dp_hash(struct dp_packet *packet, - const struct miniflow *mf) +dpif_netdev_packet_get_rss_hash(struct dp_packet *packet, + const struct miniflow *mf) { - uint32_t hash; + uint32_t hash, recirc_depth; hash = dp_packet_get_rss_hash(packet); if (OVS_UNLIKELY(!hash)) { hash = miniflow_hash_5tuple(mf, 0); dp_packet_set_rss_hash(packet, hash); } + + /* The RSS hash must account for the recirculation depth to avoid + * collisions in the exact match cache */ + recirc_depth = *recirc_depth_get_unsafe(); + if (OVS_UNLIKELY(recirc_depth)) { + hash = hash_finish(hash, recirc_depth); + dp_packet_set_rss_hash(packet, hash); + } return hash; } @@ -3128,7 +3136,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, miniflow_extract(packets[i], &key.mf); key.len = 0; /* Not computed yet. */ - key.hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.mf); + key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); flow = emc_lookup(flow_cache, &key); if (OVS_LIKELY(flow)) { From b940b3d79367b3661479367399e353dabcf9c569 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 22 May 2015 11:22:40 -0700 Subject: [PATCH 031/146] datapath: Support masked set actions. OVS kernel module support for masked set actions in already upstream in Linux (commit 83d2b9ba1abca241df44a502b6da950a25856b5b). This patch adds the same for the OVS tree kernel module. The existing set action sets many fields at once. When only a subset of the IP header fields, for example, should be modified, all the IP fields need to be exact matched so that the other field values can be copied to the set action. A masked set action allows modification of an arbitrary subset of the supported header bits without requiring the rest to be matched. Masked set action is now supported for all writeable key types, except for the tunnel key. The set tunnel action is an exception as any input tunnel info is cleared before action processing starts, so there is no tunnel info to mask. The kernel module converts all (non-tunnel) set actions to masked set actions. This makes action processing more uniform, and results in less branching and duplicating the action processing code. When returning actions to userspace, the conversion is inverted. We use a kernel internal action code to be able to tell the userspace provided and converted masked set actions apart. Signed-off-by: Jarno Rajahalme Acked-by: Jesse Gross --- datapath/actions.c | 370 +++++++++++------- datapath/flow_netlink.c | 160 ++++++-- .../linux/compat/include/linux/openvswitch.h | 12 +- 3 files changed, 372 insertions(+), 170 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 98c4376b6..52d7213c9 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -190,10 +190,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, - const __be32 *mpls_lse) +/* 'KEY' must not have any bits set outside of the 'MASK' */ +#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) +#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK)) + +static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, + const __be32 *mpls_lse, const __be32 *mask) { __be32 *stack; + __be32 lse; int err; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); @@ -201,14 +206,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, return err; stack = (__be32 *)skb_mpls_header(skb); + lse = MASKED(*stack, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), *mpls_lse }; + __be32 diff[] = { ~(*stack), lse }; + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = *mpls_lse; - key->mpls.top_lse = *mpls_lse; + *stack = lse; + flow_key->mpls.top_lse = lse; return 0; } @@ -237,23 +244,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); } -static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ethernet *eth_key) +/* 'src' is already properly masked. */ +static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) +{ + u16 *dst = (u16 *)dst_; + const u16 *src = (const u16 *)src_; + const u16 *mask = (const u16 *)mask_; + + SET_MASKED(dst[0], src[0], mask[0]); + SET_MASKED(dst[1], src[1], mask[1]); + SET_MASKED(dst[2], src[2], mask[2]); +} + +static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ethernet *key, + const struct ovs_key_ethernet *mask) { int err; + err = skb_ensure_writable(skb, ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); - ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); + ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src, + mask->eth_src); + ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, + mask->eth_dst); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(key->eth.src, eth_key->eth_src); - ether_addr_copy(key->eth.dst, eth_key->eth_dst); + ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); + ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); return 0; } @@ -311,6 +334,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, } } +static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], + const __be32 mask[4], __be32 masked[4]) +{ + masked[0] = MASKED(old[0], addr[0], mask[0]); + masked[1] = MASKED(old[1], addr[1], mask[1]); + masked[2] = MASKED(old[2], addr[2], mask[2]); + masked[3] = MASKED(old[3], addr[3], mask[3]); +} + static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, __be32 addr[4], const __be32 new_addr[4], bool recalculate_csum) @@ -322,29 +354,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) { - nh->priority = tc >> 4; - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); + /* Bits 21-24 are always unmasked, so this retains their values. */ + SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); + SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); + SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, + u8 mask) { - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; - nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; - nh->flow_lbl[2] = fl & 0x000000FF; -} + new_ttl = MASKED(nh->ttl, new_ttl, mask); -static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) -{ csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); nh->ttl = new_ttl; } -static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ipv4 *ipv4_key) +static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ipv4 *key, + const struct ovs_key_ipv4 *mask) { struct iphdr *nh; + __be32 new_addr; int err; err = skb_ensure_writable(skb, skb_network_offset(skb) + @@ -354,36 +386,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, nh = ip_hdr(skb); - if (ipv4_key->ipv4_src != nh->saddr) { - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); - key->ipv4.addr.src = ipv4_key->ipv4_src; - } + /* Setting an IP addresses is typically only a side effect of + * matching on them in the current userspace implementation, so it + * makes sense to check if the value actually changed. + */ + if (mask->ipv4_src) { + new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); - if (ipv4_key->ipv4_dst != nh->daddr) { - set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); - key->ipv4.addr.dst = ipv4_key->ipv4_dst; + if (unlikely(new_addr != nh->saddr)) { + set_ip_addr(skb, nh, &nh->saddr, new_addr); + flow_key->ipv4.addr.src = new_addr; + } } + if (mask->ipv4_dst) { + new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); - if (ipv4_key->ipv4_tos != nh->tos) { - ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); - key->ip.tos = nh->tos; + if (unlikely(new_addr != nh->daddr)) { + set_ip_addr(skb, nh, &nh->daddr, new_addr); + flow_key->ipv4.addr.dst = new_addr; + } } - - if (ipv4_key->ipv4_ttl != nh->ttl) { - set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); - key->ip.ttl = ipv4_key->ipv4_ttl; + if (mask->ipv4_tos) { + ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos); + flow_key->ip.tos = nh->tos; + } + if (mask->ipv4_ttl) { + set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl); + flow_key->ip.ttl = nh->ttl; } return 0; } -static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ipv6 *ipv6_key) +static bool is_ipv6_mask_nonzero(const __be32 addr[4]) +{ + return !!(addr[0] | addr[1] | addr[2] | addr[3]); +} + +static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ipv6 *key, + const struct ovs_key_ipv6 *mask) { struct ipv6hdr *nh; int err; - __be32 *saddr; - __be32 *daddr; err = skb_ensure_writable(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr)); @@ -391,71 +436,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, return err; nh = ipv6_hdr(skb); - saddr = (__be32 *)&nh->saddr; - daddr = (__be32 *)&nh->daddr; - if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) { - set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, - ipv6_key->ipv6_src, true); - memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src, - sizeof(ipv6_key->ipv6_src)); + /* Setting an IP addresses is typically only a side effect of + * matching on them in the current userspace implementation, so it + * makes sense to check if the value actually changed. + */ + if (is_ipv6_mask_nonzero(mask->ipv6_src)) { + __be32 *saddr = (__be32 *)&nh->saddr; + __be32 masked[4]; + + mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); + + if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { + set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + true); + memcpy(&flow_key->ipv6.addr.src, masked, + sizeof(flow_key->ipv6.addr.src)); + } } - - if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + if (is_ipv6_mask_nonzero(mask->ipv6_dst)) { unsigned int offset = 0; int flags = IP6_FH_F_SKIP_RH; bool recalc_csum = true; + __be32 *daddr = (__be32 *)&nh->daddr; + __be32 masked[4]; - if (ipv6_ext_hdr(nh->nexthdr)) - recalc_csum = ipv6_find_hdr(skb, &offset, - NEXTHDR_ROUTING, NULL, - &flags) != NEXTHDR_ROUTING; + mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked); - set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, - ipv6_key->ipv6_dst, recalc_csum); - memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst, - sizeof(ipv6_key->ipv6_dst)); + if (unlikely(memcmp(daddr, masked, sizeof(masked)))) { + if (ipv6_ext_hdr(nh->nexthdr)) + recalc_csum = (ipv6_find_hdr(skb, &offset, + NEXTHDR_ROUTING, + NULL, &flags) + != NEXTHDR_ROUTING); + + set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + recalc_csum); + memcpy(&flow_key->ipv6.addr.dst, masked, + sizeof(flow_key->ipv6.addr.dst)); + } + } + if (mask->ipv6_tclass) { + ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + flow_key->ip.tos = ipv6_get_dsfield(nh); + } + if (mask->ipv6_label) { + set_ipv6_fl(nh, ntohl(key->ipv6_label), + ntohl(mask->ipv6_label)); + flow_key->ipv6.label = + *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); + } + if (mask->ipv6_hlimit) { + SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit); + flow_key->ip.ttl = nh->hop_limit; } - - set_ipv6_tc(nh, ipv6_key->ipv6_tclass); - key->ip.tos = ipv6_get_dsfield(nh); - - set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); - key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); - - nh->hop_limit = ipv6_key->ipv6_hlimit; - key->ip.ttl = ipv6_key->ipv6_hlimit; return 0; } /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, - __be16 new_port, __sum16 *check) + __be16 new_port, __sum16 *check) { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; - skb_clear_hash(skb); } -static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) -{ - struct udphdr *uh = udp_hdr(skb); - - if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { - set_tp_port(skb, port, new_port, &uh->check); - - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } else { - *port = new_port; - skb_clear_hash(skb); - } -} - -static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_udp *udp_port_key) +static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_udp *key, + const struct ovs_key_udp *mask) { struct udphdr *uh; + __be16 src, dst; int err; err = skb_ensure_writable(skb, skb_transport_offset(skb) + @@ -464,23 +515,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, return err; uh = udp_hdr(skb); - if (udp_port_key->udp_src != uh->source) { - set_udp_port(skb, &uh->source, udp_port_key->udp_src); - key->tp.src = udp_port_key->udp_src; + /* Either of the masks is non-zero, so do not bother checking them. */ + src = MASKED(uh->source, key->udp_src, mask->udp_src); + dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); + + if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { + if (likely(src != uh->source)) { + set_tp_port(skb, &uh->source, src, &uh->check); + flow_key->tp.src = src; + } + if (likely(dst != uh->dest)) { + set_tp_port(skb, &uh->dest, dst, &uh->check); + flow_key->tp.dst = dst; + } + + if (unlikely(!uh->check)) + uh->check = CSUM_MANGLED_0; + } else { + uh->source = src; + uh->dest = dst; + flow_key->tp.src = src; + flow_key->tp.dst = dst; } - if (udp_port_key->udp_dst != uh->dest) { - set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); - key->tp.dst = udp_port_key->udp_dst; - } + skb_clear_hash(skb); return 0; } -static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_tcp *tcp_port_key) +static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_tcp *key, + const struct ovs_key_tcp *mask) { struct tcphdr *th; + __be16 src, dst; int err; err = skb_ensure_writable(skb, skb_transport_offset(skb) + @@ -489,50 +557,51 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, return err; th = tcp_hdr(skb); - if (tcp_port_key->tcp_src != th->source) { - set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); - key->tp.src = tcp_port_key->tcp_src; - } - if (tcp_port_key->tcp_dst != th->dest) { - set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); - key->tp.dst = tcp_port_key->tcp_dst; + src = MASKED(th->source, key->tcp_src, mask->tcp_src); + if (likely(src != th->source)) { + set_tp_port(skb, &th->source, src, &th->check); + flow_key->tp.src = src; } + dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst); + if (likely(dst != th->dest)) { + set_tp_port(skb, &th->dest, dst, &th->check); + flow_key->tp.dst = dst; + } + skb_clear_hash(skb); return 0; } -static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_sctp *sctp_port_key) +static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_sctp *key, + const struct ovs_key_sctp *mask) { - struct sctphdr *sh; - int err; unsigned int sctphoff = skb_transport_offset(skb); + struct sctphdr *sh; + __le32 old_correct_csum, new_csum, old_csum; + int err; err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr)); if (unlikely(err)) return err; sh = sctp_hdr(skb); - if (sctp_port_key->sctp_src != sh->source || - sctp_port_key->sctp_dst != sh->dest) { - __le32 old_correct_csum, new_csum, old_csum; - old_csum = sh->checksum; - old_correct_csum = sctp_compute_cksum(skb, sctphoff); + old_csum = sh->checksum; + old_correct_csum = sctp_compute_cksum(skb, sctphoff); - sh->source = sctp_port_key->sctp_src; - sh->dest = sctp_port_key->sctp_dst; + sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src); + sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); - new_csum = sctp_compute_cksum(skb, sctphoff); + new_csum = sctp_compute_cksum(skb, sctphoff); - /* Carry any checksum errors through. */ - sh->checksum = old_csum ^ old_correct_csum ^ new_csum; + /* Carry any checksum errors through. */ + sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - skb_clear_hash(skb); - key->tp.src = sctp_port_key->sctp_src; - key->tp.dst = sctp_port_key->sctp_dst; - } + skb_clear_hash(skb); + flow_key->tp.src = sh->source; + flow_key->tp.dst = sh->dest; return 0; } @@ -660,52 +729,78 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, key->ovs_flow_hash = hash; } -static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *nested_attr) +static int execute_set_action(struct sk_buff *skb, + struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + /* Only tunnel set execution is supported without a mask. */ + if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) { + OVS_CB(skb)->egress_tun_info = nla_data(a); + return 0; + } + + return -EINVAL; + +} + +/* Mask is at the midpoint of the data. */ +#define get_mask(a, type) ((const type)nla_data(a) + 1) + +static int execute_masked_set_action(struct sk_buff *skb, + struct sw_flow_key *flow_key, + const struct nlattr *a) { int err = 0; - switch (nla_type(nested_attr)) { + switch (nla_type(a)) { case OVS_KEY_ATTR_PRIORITY: - skb->priority = nla_get_u32(nested_attr); - key->phy.priority = skb->priority; + SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *)); + flow_key->phy.priority = skb->priority; break; case OVS_KEY_ATTR_SKB_MARK: - skb->mark = nla_get_u32(nested_attr); - key->phy.skb_mark = skb->mark; + SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); + flow_key->phy.skb_mark = skb->mark; break; case OVS_KEY_ATTR_TUNNEL_INFO: - OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); + /* Masked data not supported for tunnel. */ + err = -EINVAL; break; case OVS_KEY_ATTR_ETHERNET: - err = set_eth_addr(skb, key, nla_data(nested_attr)); + err = set_eth_addr(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ethernet *)); break; case OVS_KEY_ATTR_IPV4: - err = set_ipv4(skb, key, nla_data(nested_attr)); + err = set_ipv4(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ipv4 *)); break; case OVS_KEY_ATTR_IPV6: - err = set_ipv6(skb, key, nla_data(nested_attr)); + err = set_ipv6(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ipv6 *)); break; case OVS_KEY_ATTR_TCP: - err = set_tcp(skb, key, nla_data(nested_attr)); + err = set_tcp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_tcp *)); break; case OVS_KEY_ATTR_UDP: - err = set_udp(skb, key, nla_data(nested_attr)); + err = set_udp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_udp *)); break; case OVS_KEY_ATTR_SCTP: - err = set_sctp(skb, key, nla_data(nested_attr)); + err = set_sctp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_sctp *)); break; case OVS_KEY_ATTR_MPLS: - err = set_mpls(skb, key, nla_data(nested_attr)); + err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, + __be32 *)); break; } @@ -825,6 +920,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_set_action(skb, key, nla_data(a)); break; + case OVS_ACTION_ATTR_SET_MASKED: + case OVS_ACTION_ATTR_SET_TO_MASKED: + err = execute_masked_set_action(skb, key, nla_data(a)); + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); break; diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 890a88997..95b852a35 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -1705,16 +1705,6 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key, - __be16 eth_type) -{ - if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && - (flow_key->tp.src || flow_key->tp.dst)) - return 0; - - return -EINVAL; -} - void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask) @@ -1817,23 +1807,45 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +/* Return false if there are any non-masked bits set. + * Mask follows data immediately, before any netlink padding. + */ +static bool validate_masked(u8 *data, int len) +{ + u8 *mask = data + len; + + while (len--) + if (*data++ & ~*mask++) + return false; + + return true; +} + static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun, __be16 eth_type, bool log) + bool *skip_copy, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); + size_t key_len; /* There can be only one key in a action */ if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; + key_len = nla_len(ovs_key); + if (masked) + key_len /= 2; + if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type].len != nla_len(ovs_key) && + (ovs_key_lens[key_type].len != key_len && ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) return -EINVAL; + if (masked && !validate_masked(nla_data(ovs_key), key_len)) + return -EINVAL; + switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; @@ -1848,7 +1860,10 @@ static int validate_set(const struct nlattr *a, if (eth_p_mpls(eth_type)) return -EINVAL; - *set_tun = true; + if (masked) + return -EINVAL; /* Masked tunnel set not supported. */ + + *skip_copy = true; err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; @@ -1858,48 +1873,65 @@ static int validate_set(const struct nlattr *a, if (eth_type != htons(ETH_P_IP)) return -EINVAL; - if (!flow_key->ip.proto) - return -EINVAL; - ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; + if (masked) { + const struct ovs_key_ipv4 *mask = ipv4_key + 1; + /* Non-writeable fields. */ + if (mask->ipv4_proto || mask->ipv4_frag) + return -EINVAL; + } else { + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + } break; case OVS_KEY_ATTR_IPV6: if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; - if (!flow_key->ip.proto) - return -EINVAL; - ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; + if (masked) { + const struct ovs_key_ipv6 *mask = ipv6_key + 1; - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; + /* Non-writeable fields. */ + if (mask->ipv6_proto || mask->ipv6_frag) + return -EINVAL; + /* Invalid bits in the flow label mask? */ + if (ntohl(mask->ipv6_label) & 0xFFF00000) + return -EINVAL; + } else { + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + } if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) return -EINVAL; break; case OVS_KEY_ATTR_TCP: - if (flow_key->ip.proto != IPPROTO_TCP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; case OVS_KEY_ATTR_UDP: - if (flow_key->ip.proto != IPPROTO_UDP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) @@ -1907,15 +1939,45 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_SCTP: - if (flow_key->ip.proto != IPPROTO_SCTP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; default: return -EINVAL; } + /* Convert non-masked non-tunnel set actions to masked set actions. */ + if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { + int start, len = key_len * 2; + struct nlattr *at; + + *skip_copy = true; + + start = add_nested_action_start(sfa, + OVS_ACTION_ATTR_SET_TO_MASKED, + log); + if (start < 0) + return start; + + at = __add_action(sfa, key_type, NULL, len, log); + if (IS_ERR(at)) + return PTR_ERR(at); + + memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ + memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ + /* Clear non-writeable bits from otherwise writeable fields. */ + if (key_type == OVS_KEY_ATTR_IPV6) { + struct ovs_key_ipv6 *mask = nla_data(at) + key_len; + + mask->ipv6_label &= htonl(0x000FFFFF); + } + add_nested_action_end(*sfa, start); + } + return 0; } @@ -1977,6 +2039,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) }; @@ -2074,7 +2137,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &skip_copy, eth_type, log); + &skip_copy, eth_type, false, log); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SET_MASKED: + err = validate_set(a, key, sfa, + &skip_copy, eth_type, true, log); if (err) return err; break; @@ -2104,6 +2174,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +/* 'key' must be the masked key. */ int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) @@ -2191,6 +2262,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return 0; } +static int masked_set_action_to_set_action_attr(const struct nlattr *a, + struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + size_t key_len = nla_len(ovs_key) / 2; + + /* Revert the conversion we did from a non-masked set action to + * masked set action. + */ + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) + return -EMSGSIZE; + + return 0; +} + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -2206,6 +2292,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_SET_TO_MASKED: + err = masked_set_action_to_set_action_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index f53bc81c7..6cca50155 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -675,6 +675,9 @@ struct ovs_action_push_tnl { * fields within a header are modifiable, e.g. the IPv4 protocol and fragment * type may not be changed. * + * + * @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated + * from the @OVS_ACTION_ATTR_SET. * @OVS_ACTION_ATTR_TUNNEL_PUSH: Push tunnel header described by struct * ovs_action_push_tnl. * @OVS_ACTION_ATTR_TUNNEL_POP: Lookup tunnel port by port-no passed and pop @@ -702,7 +705,14 @@ enum ovs_action_attr { OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/ OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */ #endif - __OVS_ACTION_ATTR_MAX + __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted + * from userspace. */ + +#ifdef __KERNEL__ + OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked + * set action converted from + * OVS_ACTION_ATTR_SET. */ +#endif }; #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) From bd5131ba76156cac4f00d4ace3cdcb31d9135d11 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Fri, 22 May 2015 17:14:19 +0100 Subject: [PATCH 032/146] ovs-numa: Change 'core_id' to unsigned. DPDK lcore_id is unsigned. We need to support big values like LCORE_ID_ANY (=UINT32_MAX). Therefore I am changing the type everywhere in OVS. Signed-off-by: Daniele Di Proietto Signed-off-by: Ethan Jackson Acked-by: Ethan Jackson --- lib/dpif-netdev.c | 27 +++++++++++++++------------ lib/dpif.c | 6 +++--- lib/dpif.h | 16 ++++++++-------- lib/netdev-dpdk.c | 4 ++-- lib/netdev-dpdk.h | 4 ++-- lib/ovs-numa.c | 20 ++++++++++---------- lib/ovs-numa.h | 30 +++++++++++++++--------------- ofproto/ofproto-dpif-upcall.c | 12 ++++++------ 8 files changed, 61 insertions(+), 58 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 22fba7e6d..ace5cb552 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -295,7 +295,7 @@ struct dp_netdev_flow { const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */ /* 'flow_table'. */ const ovs_u128 ufid; /* Unique flow identifier. */ - const int pmd_id; /* The 'core_id' of pmd thread owning this */ + const unsigned pmd_id; /* The 'core_id' of pmd thread owning this */ /* flow. */ /* Number of references. @@ -413,7 +413,7 @@ struct dp_netdev_pmd_thread { pthread_t thread; int index; /* Idx of this pmd thread among pmd*/ /* threads on same numa node. */ - int core_id; /* CPU core id of this pmd thread. */ + unsigned core_id; /* CPU core id of this pmd thread. */ int numa_id; /* numa node id of this pmd thread. */ /* Only a pmd thread can write on its own 'cycles' and 'stats'. @@ -458,11 +458,11 @@ static void dp_netdev_disable_upcall(struct dp_netdev *); void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd); static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, int index, - int core_id, int numa_id); + unsigned core_id, int numa_id); static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd); static void dp_netdev_set_nonpmd(struct dp_netdev *dp); static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp, - int core_id); + unsigned core_id); static struct dp_netdev_pmd_thread * dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos); static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp); @@ -581,7 +581,7 @@ pmd_info_show_stats(struct ds *reply, ds_put_format(reply, " numa_id %d", pmd->numa_id); } if (pmd->core_id != OVS_CORE_UNSPEC) { - ds_put_format(reply, " core_id %d", pmd->core_id); + ds_put_format(reply, " core_id %u", pmd->core_id); } ds_put_cstr(reply, ":\n"); @@ -1942,7 +1942,8 @@ dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get) struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; struct dp_netdev_pmd_thread *pmd; - int pmd_id = get->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : get->pmd_id; + unsigned pmd_id = get->pmd_id == PMD_ID_NULL + ? NON_PMD_CORE_ID : get->pmd_id; int error = 0; pmd = dp_netdev_get_pmd(dp, pmd_id); @@ -1982,7 +1983,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, memset(&flow->stats, 0, sizeof flow->stats); flow->dead = false; flow->batch = NULL; - *CONST_CAST(int *, &flow->pmd_id) = pmd->core_id; + *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id; *CONST_CAST(struct flow *, &flow->flow) = match->flow; *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; ovs_refcount_init(&flow->ref_cnt); @@ -2025,7 +2026,8 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) struct dp_netdev_pmd_thread *pmd; struct match match; ovs_u128 ufid; - int pmd_id = put->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : put->pmd_id; + unsigned pmd_id = put->pmd_id == PMD_ID_NULL + ? NON_PMD_CORE_ID : put->pmd_id; int error; error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow); @@ -2120,7 +2122,8 @@ dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del) struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; struct dp_netdev_pmd_thread *pmd; - int pmd_id = del->pmd_id == PMD_ID_NULL ? NON_PMD_CORE_ID : del->pmd_id; + unsigned pmd_id = del->pmd_id == PMD_ID_NULL + ? NON_PMD_CORE_ID : del->pmd_id; int error = 0; pmd = dp_netdev_get_pmd(dp, pmd_id); @@ -2745,7 +2748,7 @@ dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd) * * Caller must unrefs the returned reference. */ static struct dp_netdev_pmd_thread * -dp_netdev_get_pmd(struct dp_netdev *dp, int core_id) +dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id) { struct dp_netdev_pmd_thread *pmd; const struct cmap_node *pnode; @@ -2808,7 +2811,7 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos) /* Configures the 'pmd' based on the input argument. */ static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, - int index, int core_id, int numa_id) + int index, unsigned core_id, int numa_id) { pmd->dp = dp; pmd->index = index; @@ -2921,7 +2924,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id) can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS); for (i = 0; i < can_have; i++) { struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); - int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); + unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); /* Each thread will distribute all devices rx-queues among diff --git a/lib/dpif.c b/lib/dpif.c index b8f30a503..aa5e64e3f 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -917,7 +917,7 @@ dpif_probe_feature(struct dpif *dpif, const char *name, int dpif_flow_get(struct dpif *dpif, const struct nlattr *key, size_t key_len, const ovs_u128 *ufid, - const int pmd_id, struct ofpbuf *buf, struct dpif_flow *flow) + const unsigned pmd_id, struct ofpbuf *buf, struct dpif_flow *flow) { struct dpif_op *opp; struct dpif_op op; @@ -946,7 +946,7 @@ dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct nlattr *actions, size_t actions_len, - const ovs_u128 *ufid, const int pmd_id, + const ovs_u128 *ufid, const unsigned pmd_id, struct dpif_flow_stats *stats) { struct dpif_op *opp; @@ -974,7 +974,7 @@ dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags, int dpif_flow_del(struct dpif *dpif, const struct nlattr *key, size_t key_len, const ovs_u128 *ufid, - const int pmd_id, struct dpif_flow_stats *stats) + const unsigned pmd_id, struct dpif_flow_stats *stats) { struct dpif_op *opp; struct dpif_op op; diff --git a/lib/dpif.h b/lib/dpif.h index 06c652558..ba5d59763 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -525,15 +525,15 @@ int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct nlattr *actions, size_t actions_len, - const ovs_u128 *ufid, const int pmd_id, + const ovs_u128 *ufid, const unsigned pmd_id, struct dpif_flow_stats *); int dpif_flow_del(struct dpif *, const struct nlattr *key, size_t key_len, - const ovs_u128 *ufid, const int pmd_id, + const ovs_u128 *ufid, const unsigned pmd_id, struct dpif_flow_stats *); int dpif_flow_get(struct dpif *, const struct nlattr *key, size_t key_len, - const ovs_u128 *ufid, const int pmd_id, + const ovs_u128 *ufid, const unsigned pmd_id, struct ofpbuf *, struct dpif_flow *); /* Flow dumping interface @@ -583,7 +583,7 @@ struct dpif_flow { size_t actions_len; /* 'actions' length in bytes. */ ovs_u128 ufid; /* Unique flow identifier. */ bool ufid_present; /* True if 'ufid' was provided by datapath.*/ - int pmd_id; /* Datapath poll mode dirver id. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ struct dpif_flow_stats stats; /* Flow statistics. */ }; int dpif_flow_dump_next(struct dpif_flow_dump_thread *, @@ -640,7 +640,7 @@ struct dpif_flow_put { const struct nlattr *actions; /* Actions to perform on flow. */ size_t actions_len; /* Length of 'actions' in bytes. */ const ovs_u128 *ufid; /* Optional unique flow identifier. */ - int pmd_id; /* Datapath poll mode driver id. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ /* Output. */ struct dpif_flow_stats *stats; /* Optional flow statistics. */ @@ -671,7 +671,7 @@ struct dpif_flow_del { const ovs_u128 *ufid; /* Unique identifier of flow to delete. */ bool terse; /* OK to skip sending/receiving full flow * info? */ - int pmd_id; /* Datapath poll mode driver id. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ /* Output. */ struct dpif_flow_stats *stats; /* Optional flow statistics. */ @@ -732,7 +732,7 @@ struct dpif_flow_get { const struct nlattr *key; /* Flow to get. */ size_t key_len; /* Length of 'key' in bytes. */ const ovs_u128 *ufid; /* Unique identifier of flow to get. */ - int pmd_id; /* Datapath poll mode driver id. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ struct ofpbuf *buffer; /* Storage for output parameters. */ /* Output. */ @@ -807,7 +807,7 @@ struct dpif_upcall { typedef int upcall_callback(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid, - int pmd_id, + unsigned pmd_id, enum dpif_upcall_type type, const struct nlattr *userdata, struct ofpbuf *actions, diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 02a003280..a4868ccc1 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -478,7 +478,7 @@ netdev_dpdk_alloc(void) static void netdev_dpdk_alloc_txq(struct netdev_dpdk *netdev, unsigned int n_txqs) { - int i; + unsigned i; netdev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *netdev->tx_q); /* Each index is considered as a cpu core id, since there should @@ -1993,7 +1993,7 @@ netdev_dpdk_register(void) } int -pmd_thread_setaffinity_cpu(int cpu) +pmd_thread_setaffinity_cpu(unsigned cpu) { cpu_set_t cpuset; int err; diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index d3840f97c..2924f2330 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -28,7 +28,7 @@ struct dp_packet; int dpdk_init(int argc, char **argv); void netdev_dpdk_register(void); void free_dpdk_buf(struct dp_packet *); -int pmd_thread_setaffinity_cpu(int cpu); +int pmd_thread_setaffinity_cpu(unsigned cpu); void thread_set_nonpmd(void); #else @@ -57,7 +57,7 @@ free_dpdk_buf(struct dp_packet *buf OVS_UNUSED) } static inline int -pmd_thread_setaffinity_cpu(int cpu OVS_UNUSED) +pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED) { return 0; } diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 5bed2b5e2..693541fe7 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -70,7 +70,7 @@ struct cpu_core { struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */ struct ovs_list list_node; /* In 'numa_node->cores' list. */ struct numa_node *numa; /* numa node containing the core. */ - int core_id; /* Core id. */ + unsigned core_id; /* Core id. */ bool available; /* If the core can be pinned. */ bool pinned; /* If a thread has been pinned to the core. */ }; @@ -118,7 +118,7 @@ discover_numa_and_core(void) if (!strncmp(subdir->d_name, "cpu", 3) && contain_all_digits(subdir->d_name + 3)){ struct cpu_core *c = xzalloc(sizeof *c); - uint32_t core_id; + unsigned core_id; core_id = strtoul(subdir->d_name + 3, NULL, 10); hmap_insert(&all_cpu_cores, &c->hmap_node, @@ -153,7 +153,7 @@ discover_numa_and_core(void) /* Gets 'struct cpu_core' by 'core_id'. */ static struct cpu_core* -get_core_by_core_id(int core_id) +get_core_by_core_id(unsigned core_id) { struct cpu_core *core = NULL; @@ -201,13 +201,13 @@ ovs_numa_numa_id_is_valid(int numa_id) } bool -ovs_numa_core_id_is_valid(int core_id) +ovs_numa_core_id_is_valid(unsigned core_id) { return found_numa_and_core && core_id < ovs_numa_get_n_cores(); } bool -ovs_numa_core_is_pinned(int core_id) +ovs_numa_core_is_pinned(unsigned core_id) { struct cpu_core *core = get_core_by_core_id(core_id); @@ -237,7 +237,7 @@ ovs_numa_get_n_cores(void) /* Given 'core_id', returns the corresponding numa node id. Returns * OVS_NUMA_UNSPEC if 'core_id' is invalid. */ int -ovs_numa_get_numa_id(int core_id) +ovs_numa_get_numa_id(unsigned core_id) { struct cpu_core *core = get_core_by_core_id(core_id); @@ -288,7 +288,7 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) * False, if the core has already been pinned, or if it is invalid or * not available. */ bool -ovs_numa_try_pin_core_specific(int core_id) +ovs_numa_try_pin_core_specific(unsigned core_id) { struct cpu_core *core = get_core_by_core_id(core_id); @@ -305,7 +305,7 @@ ovs_numa_try_pin_core_specific(int core_id) /* Searches through all cores for an unpinned and available core. Returns * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise, * returns OVS_CORE_UNSPEC. */ -int +unsigned ovs_numa_get_unpinned_core_any(void) { struct cpu_core *core; @@ -323,7 +323,7 @@ ovs_numa_get_unpinned_core_any(void) /* Searches through all cores on numa node with 'numa_id' for an * unpinned and available core. Returns the core_id if found and * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */ -int +unsigned ovs_numa_get_unpinned_core_on_numa(int numa_id) { struct numa_node *numa = get_numa_by_numa_id(numa_id); @@ -344,7 +344,7 @@ ovs_numa_get_unpinned_core_on_numa(int numa_id) /* Unpins the core with 'core_id'. */ void -ovs_numa_unpin_core(int core_id) +ovs_numa_unpin_core(unsigned core_id) { struct cpu_core *core = get_core_by_core_id(core_id); diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 35b351bfb..1435d3d97 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -35,25 +35,25 @@ struct ovs_numa_dump { struct ovs_numa_info { struct ovs_list list_node; int numa_id; - int core_id; + unsigned core_id; }; #ifdef __linux__ void ovs_numa_init(void); bool ovs_numa_numa_id_is_valid(int numa_id); -bool ovs_numa_core_id_is_valid(int core_id); -bool ovs_numa_core_is_pinned(int core_id); +bool ovs_numa_core_id_is_valid(unsigned core_id); +bool ovs_numa_core_is_pinned(unsigned core_id); int ovs_numa_get_n_numas(void); void ovs_numa_set_cpu_mask(const char *cmask); int ovs_numa_get_n_cores(void); -int ovs_numa_get_numa_id(int core_id); +int ovs_numa_get_numa_id(unsigned core_id); int ovs_numa_get_n_cores_on_numa(int numa_id); int ovs_numa_get_n_unpinned_cores_on_numa(int numa_id); -bool ovs_numa_try_pin_core_specific(int core_id); -int ovs_numa_get_unpinned_core_any(void); -int ovs_numa_get_unpinned_core_on_numa(int numa_id); -void ovs_numa_unpin_core(int core_id); +bool ovs_numa_try_pin_core_specific(unsigned core_id); +unsigned ovs_numa_get_unpinned_core_any(void); +unsigned ovs_numa_get_unpinned_core_on_numa(int numa_id); +void ovs_numa_unpin_core(unsigned core_id); struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id); void ovs_numa_dump_destroy(struct ovs_numa_dump *); @@ -75,13 +75,13 @@ ovs_numa_numa_id_is_valid(int numa_id OVS_UNUSED) } static inline bool -ovs_numa_core_id_is_valid(int core_id OVS_UNUSED) +ovs_numa_core_id_is_valid(unsigned core_id OVS_UNUSED) { return false; } static inline bool -ovs_numa_core_is_pinned(int core_id OVS_UNUSED) +ovs_numa_core_is_pinned(unsigned core_id OVS_UNUSED) { return false; } @@ -105,7 +105,7 @@ ovs_numa_get_n_cores(void) } static inline int -ovs_numa_get_numa_id(int core_id OVS_UNUSED) +ovs_numa_get_numa_id(unsigned core_id OVS_UNUSED) { return OVS_NUMA_UNSPEC; } @@ -123,25 +123,25 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id OVS_UNUSED) } static inline bool -ovs_numa_try_pin_core_specific(int core_id OVS_UNUSED) +ovs_numa_try_pin_core_specific(unsigned core_id OVS_UNUSED) { return false; } -static inline int +static inline unsigned ovs_numa_get_unpinned_core_any(void) { return OVS_CORE_UNSPEC; } -static inline int +static inline unsigned ovs_numa_get_unpinned_core_on_numa(int numa_id OVS_UNUSED) { return OVS_CORE_UNSPEC; } static inline void -ovs_numa_unpin_core(int core_id OVS_UNUSED) +ovs_numa_unpin_core(unsigned core_id OVS_UNUSED) { /* Nothing */ } diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 01bc382b5..4dc169450 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -160,7 +160,7 @@ struct upcall { * may be used with other datapaths. */ const struct flow *flow; /* Parsed representation of the packet. */ const ovs_u128 *ufid; /* Unique identifier for 'flow'. */ - int pmd_id; /* Datapath poll mode driver id. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ const struct dp_packet *packet; /* Packet associated with this upcall. */ ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */ @@ -214,7 +214,7 @@ struct udpif_key { ovs_u128 ufid; /* Unique flow identifier. */ bool ufid_present; /* True if 'ufid' is in datapath. */ uint32_t hash; /* Pre-computed hash for 'key'. */ - int pmd_id; /* Datapath poll mode driver id. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ struct ovs_mutex mutex; /* Guards the following. */ struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/ @@ -296,7 +296,7 @@ static enum upcall_type classify_upcall(enum dpif_upcall_type type, static int upcall_receive(struct upcall *, const struct dpif_backer *, const struct dp_packet *packet, enum dpif_upcall_type, const struct nlattr *userdata, const struct flow *, - const ovs_u128 *ufid, const int pmd_id); + const ovs_u128 *ufid, const unsigned pmd_id); static void upcall_uninit(struct upcall *); static upcall_callback upcall_cb; @@ -901,7 +901,7 @@ static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct dp_packet *packet, enum dpif_upcall_type type, const struct nlattr *userdata, const struct flow *flow, - const ovs_u128 *ufid, const int pmd_id) + const ovs_u128 *ufid, const unsigned pmd_id) { int error; @@ -1040,7 +1040,7 @@ upcall_uninit(struct upcall *upcall) static int upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid, - int pmd_id, enum dpif_upcall_type type, + unsigned pmd_id, enum dpif_upcall_type type, const struct nlattr *userdata, struct ofpbuf *actions, struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux) { @@ -1313,7 +1313,7 @@ static struct udpif_key * ukey_create__(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, bool ufid_present, const ovs_u128 *ufid, - const int pmd_id, const struct ofpbuf *actions, + const unsigned pmd_id, const struct ofpbuf *actions, uint64_t dump_seq, uint64_t reval_seq, long long int used, const struct recirc_id_node *key_recirc, struct xlate_out *xout) OVS_NO_THREAD_SAFETY_ANALYSIS From d5c199ea7ff7fa696be27d35a92276ce02deb54d Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Fri, 22 May 2015 17:14:20 +0100 Subject: [PATCH 033/146] netdev-dpdk: Properly support non pmd threads. We used to reserve DPDK lcore 0 for non pmd operations, making it difficult to use core 0 for packet processing. DPDK 2.0 properly support non EAL threads with lcore LCORE_ID_ANY. Using non EAL threads for non pmd threads, we do not need to reserve any core for non pmd operations Signed-off-by: Daniele Di Proietto Signed-off-by: Ethan Jackson Acked-by: Ethan Jackson --- INSTALL.DPDK.md | 3 --- lib/dpctl.c | 6 ++++++ lib/dpif-netdev.c | 4 +--- lib/netdev-dpdk.c | 10 +--------- lib/netdev-dpdk.h | 16 ++++------------ lib/ovs-thread.c | 2 -- 6 files changed, 12 insertions(+), 29 deletions(-) diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 068d6315a..462ba0e4c 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -260,9 +260,6 @@ Using the DPDK with ovs-vswitchd: Note, the pmd threads on a numa node are only created if there is at least one DPDK interface from the numa node that has been added to OVS. - Note, core 0 is always reserved from non-pmd threads and should never be set - in the cpu mask. - To understand where most of the time is spent and whether the caches are effective, these commands can be used: diff --git a/lib/dpctl.c b/lib/dpctl.c index 05c28d177..e95483e9f 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -783,6 +783,12 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) } } + /* Make sure that these values are different. PMD_ID_NULL means that the + * pmd is unspecified (e.g. because the datapath doesn't have different + * pmd threads), while NON_PMD_CORE_ID refers to every non pmd threads + * in the userspace datapath */ + BUILD_ASSERT(PMD_ID_NULL != NON_PMD_CORE_ID); + ds_init(&ds); flow_dump = dpif_flow_dump_create(dpif, false); flow_dump_thread = dpif_flow_dump_thread_create(flow_dump); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ace5cb552..76d100335 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -580,7 +580,7 @@ pmd_info_show_stats(struct ds *reply, if (pmd->numa_id != OVS_NUMA_UNSPEC) { ds_put_format(reply, " numa_id %d", pmd->numa_id); } - if (pmd->core_id != OVS_CORE_UNSPEC) { + if (pmd->core_id != OVS_CORE_UNSPEC && pmd->core_id != NON_PMD_CORE_ID) { ds_put_format(reply, " core_id %u", pmd->core_id); } ds_put_cstr(reply, ":\n"); @@ -829,8 +829,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class, ovs_mutex_init_recursive(&dp->non_pmd_mutex); ovsthread_key_create(&dp->per_pmd_key, NULL); - /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */ - ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID); dp_netdev_set_nonpmd(dp); dp->n_dpdk_rxqs = NR_QUEUE; diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index a4868ccc1..3fe5a82c0 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1927,7 +1927,7 @@ dpdk_init(int argc, char **argv) } /* We are called from the main thread here */ - thread_set_nonpmd(); + RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; return result + 1 + base; } @@ -2012,14 +2012,6 @@ pmd_thread_setaffinity_cpu(unsigned cpu) return 0; } -void -thread_set_nonpmd(void) -{ - /* We have to use NON_PMD_CORE_ID to allow non-pmd threads to perform - * certain DPDK operations, like rte_eth_dev_configure(). */ - RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; -} - static bool thread_is_pmd(void) { diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 2924f2330..646d3e21f 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -5,11 +5,6 @@ struct dp_packet; -/* Reserves cpu core 0 for all non-pmd threads. Changing the value of this - * macro will allow pmd thread to be pinned on cpu core 0. This may not be - * ideal since the core may be non-isolated. */ -#define NON_PMD_CORE_ID 0 - #ifdef DPDK_NETDEV #include @@ -25,14 +20,17 @@ struct dp_packet; #include #include +#define NON_PMD_CORE_ID LCORE_ID_ANY + int dpdk_init(int argc, char **argv); void netdev_dpdk_register(void); void free_dpdk_buf(struct dp_packet *); int pmd_thread_setaffinity_cpu(unsigned cpu); -void thread_set_nonpmd(void); #else +#define NON_PMD_CORE_ID UINT32_MAX + #include "util.h" static inline int @@ -62,11 +60,5 @@ pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED) return 0; } -static inline void -thread_set_nonpmd(void) -{ - /* Nothing */ -} - #endif /* DPDK_NETDEV */ #endif diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c index 7d38c8001..416109563 100644 --- a/lib/ovs-thread.c +++ b/lib/ovs-thread.c @@ -334,8 +334,6 @@ ovsthread_wrapper(void *aux_) set_subprogram_name("%s%u", aux.name, id); ovsrcu_quiesce_end(); - thread_set_nonpmd(); - return aux.start(aux.arg); } From 45d947c400ded435a1b90fbcb3897c6fa8dd686d Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Fri, 22 May 2015 17:14:21 +0100 Subject: [PATCH 034/146] netdev-dpdk: Use specific spinlock for stats. Right now ethernet and ring devices use a mutex, while vhost devices use a mutex or a spinlock to protect statistics. This commit introduces a single spinlock that's always used for stats updates. Signed-off-by: Daniele Di Proietto Signed-off-by: Ethan Jackson Acked-by: Ethan Jackson --- lib/netdev-dpdk.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 3fe5a82c0..975a84250 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -194,6 +194,8 @@ struct netdev_dpdk { int socket_id; int buf_size; struct netdev_stats stats; + /* Protects stats */ + rte_spinlock_t stats_lock; uint8_t hwaddr[ETH_ADDR_LEN]; enum netdev_flags flags; @@ -504,6 +506,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, ovs_mutex_init(&netdev->mutex); ovs_mutex_lock(&netdev->mutex); + rte_spinlock_init(&netdev->stats_lock); + /* If the 'sid' is negative, it means that the kernel fails * to obtain the pci numa info. In that situation, always * use 'SOCKET0'. */ @@ -785,9 +789,9 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid) for (i = nb_tx; i < txq->count; i++) { rte_pktmbuf_free_seg(txq->burst_pkts[i]); } - ovs_mutex_lock(&dev->mutex); + rte_spinlock_lock(&dev->stats_lock); dev->stats.tx_dropped += txq->count-nb_tx; - ovs_mutex_unlock(&dev->mutex); + rte_spinlock_unlock(&dev->stats_lock); } txq->count = 0; @@ -837,7 +841,10 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_, return EAGAIN; } + rte_spinlock_lock(&vhost_dev->stats_lock); vhost_dev->stats.rx_packets += (uint64_t)nb_rx; + rte_spinlock_unlock(&vhost_dev->stats_lock); + *c = (int) nb_rx; return 0; } @@ -880,9 +887,9 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, uint64_t start = 0; if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { - ovs_mutex_lock(&vhost_dev->mutex); + rte_spinlock_lock(&vhost_dev->stats_lock); vhost_dev->stats.tx_dropped+= cnt; - ovs_mutex_unlock(&vhost_dev->mutex); + rte_spinlock_unlock(&vhost_dev->stats_lock); goto out; } @@ -924,8 +931,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, } } while (cnt); + rte_spinlock_lock(&vhost_dev->stats_lock); vhost_dev->stats.tx_packets += (total_pkts - cnt); vhost_dev->stats.tx_dropped += cnt; + rte_spinlock_unlock(&vhost_dev->stats_lock); rte_spinlock_unlock(&vhost_dev->txq_lock); out: @@ -1020,9 +1029,9 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts, } if (OVS_UNLIKELY(dropped)) { - ovs_mutex_lock(&dev->mutex); + rte_spinlock_lock(&dev->stats_lock); dev->stats.tx_dropped += dropped; - ovs_mutex_unlock(&dev->mutex); + rte_spinlock_unlock(&dev->stats_lock); } if (dev->type == DPDK_DEV_VHOST) { @@ -1102,9 +1111,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, } if (OVS_UNLIKELY(dropped)) { - ovs_mutex_lock(&dev->mutex); + rte_spinlock_lock(&dev->stats_lock); dev->stats.tx_dropped += dropped; - ovs_mutex_unlock(&dev->mutex); + rte_spinlock_unlock(&dev->stats_lock); } } } @@ -1239,10 +1248,12 @@ netdev_dpdk_vhost_get_stats(const struct netdev *netdev, stats->rx_dropped += UINT64_MAX; stats->tx_bytes += UINT64_MAX; + rte_spinlock_lock(&dev->stats_lock); /* Supported Stats */ stats->rx_packets += dev->stats.rx_packets; stats->tx_packets += dev->stats.tx_packets; stats->tx_dropped += dev->stats.tx_dropped; + rte_spinlock_unlock(&dev->stats_lock); ovs_mutex_unlock(&dev->mutex); return 0; @@ -1269,7 +1280,9 @@ netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats) stats->tx_errors = rte_stats.oerrors; stats->multicast = rte_stats.imcasts; + rte_spinlock_lock(&dev->stats_lock); stats->tx_dropped = dev->stats.tx_dropped; + rte_spinlock_unlock(&dev->stats_lock); ovs_mutex_unlock(&dev->mutex); return 0; From a0cb2d66f57ac73a56602be3abb9f69cb8ff95c9 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Fri, 22 May 2015 17:14:22 +0100 Subject: [PATCH 035/146] netdev-dpdk: Adapt the requested number of tx and rx queues. This commit changes the semantics of 'netdev_set_multiq()' to allow OVS DPDK to run on device with limited multi queue support. * If a netdev doesn't have the requested number of rxqs it can simply inform the datapath without failing. * If a netdev doesn't have the requested number of txqs it should try to create as many as possible and use locking. Signed-off-by: Daniele Di Proietto Signed-off-by: Ethan Jackson Acked-by: Ethan Jackson --- lib/netdev-dpdk.c | 85 +++++++++++++++++++++++++++++++------------ lib/netdev-provider.h | 11 ++++++ lib/netdev.c | 10 +++++ vswitchd/vswitch.xml | 2 +- 4 files changed, 84 insertions(+), 24 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 975a84250..63243d816 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -159,6 +159,10 @@ struct dpdk_tx_queue { bool flush_tx; /* Set to true to flush queue everytime */ /* pkts are queued. */ int count; + rte_spinlock_t tx_lock; /* Protects the members and the NIC queue + * from concurrent access. It is used only + * if the queue is shared among different + * pmd threads (see 'txq_needs_locking'). */ uint64_t tsc; struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; }; @@ -203,12 +207,22 @@ struct netdev_dpdk { struct rte_eth_link link; int link_reset_cnt; + /* The user might request more txqs than the NIC has. We remap those + * ('up.n_txq') on these ('real_n_txq'). + * If the numbers match, 'txq_needs_locking' is false, otherwise it is + * true and we will take a spinlock on transmission */ + int real_n_txq; + bool txq_needs_locking; + + /* Spinlock for vhost transmission. Other DPDK devices use spinlocks in + * dpdk_tx_queue */ + rte_spinlock_t vhost_tx_lock; + /* virtio-net structure for vhost device */ OVSRCU_TYPE(struct virtio_net *) virtio_dev; /* In dpdk_list. */ struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); - rte_spinlock_t txq_lock; }; struct netdev_rxq_dpdk { @@ -406,6 +420,7 @@ static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) { struct rte_pktmbuf_pool_private *mbp_priv; + struct rte_eth_dev_info info; struct ether_addr eth_addr; int diag; int i; @@ -414,14 +429,19 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) return ENODEV; } - diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq, + rte_eth_dev_info_get(dev->port_id, &info); + dev->up.n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); + dev->real_n_txq = MIN(info.max_tx_queues, dev->up.n_txq); + + diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->real_n_txq, &port_conf); if (diag) { - VLOG_ERR("eth dev config error %d",diag); + VLOG_ERR("eth dev config error %d. rxq:%d txq:%d", diag, dev->up.n_rxq, + dev->real_n_txq); return -diag; } - for (i = 0; i < dev->up.n_txq; i++) { + for (i = 0; i < dev->real_n_txq; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, dev->socket_id, NULL); if (diag) { @@ -483,14 +503,20 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *netdev, unsigned int n_txqs) unsigned i; netdev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *netdev->tx_q); - /* Each index is considered as a cpu core id, since there should - * be one tx queue for each cpu core. */ for (i = 0; i < n_txqs; i++) { int numa_id = ovs_numa_get_numa_id(i); - /* If the corresponding core is not on the same numa node - * as 'netdev', flags the 'flush_tx'. */ - netdev->tx_q[i].flush_tx = netdev->socket_id == numa_id; + if (!netdev->txq_needs_locking) { + /* Each index is considered as a cpu core id, since there should + * be one tx queue for each cpu core. If the corresponding core + * is not on the same numa node as 'netdev', flags the + * 'flush_tx'. */ + netdev->tx_q[i].flush_tx = netdev->socket_id == numa_id; + } else { + /* Queues are shared among CPUs. Always flush */ + netdev->tx_q[i].flush_tx = true; + } + rte_spinlock_init(&netdev->tx_q[i].tx_lock); } } @@ -523,7 +549,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, netdev->flags = 0; netdev->mtu = ETHER_MTU; netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); - rte_spinlock_init(&netdev->txq_lock); netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); if (!netdev->dpdk_mp) { @@ -533,6 +558,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, netdev_->n_txq = NR_QUEUE; netdev_->n_rxq = NR_QUEUE; + netdev->real_n_txq = NR_QUEUE; if (type == DPDK_DEV_ETH) { netdev_dpdk_alloc_txq(netdev, NR_QUEUE); @@ -570,6 +596,7 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[], static int netdev_dpdk_vhost_construct(struct netdev *netdev_) { + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); int err; if (rte_eal_init_ret) { @@ -580,6 +607,8 @@ netdev_dpdk_vhost_construct(struct netdev *netdev_) err = netdev_dpdk_init(netdev_, -1, DPDK_DEV_VHOST); ovs_mutex_unlock(&dpdk_mutex); + rte_spinlock_init(&netdev->vhost_tx_lock); + return err; } @@ -654,7 +683,8 @@ netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args) ovs_mutex_lock(&dev->mutex); smap_add_format(args, "configured_rx_queues", "%d", netdev_->n_rxq); - smap_add_format(args, "configured_tx_queues", "%d", netdev_->n_txq); + smap_add_format(args, "requested_tx_queues", "%d", netdev_->n_txq); + smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq); ovs_mutex_unlock(&dev->mutex); return 0; @@ -691,8 +721,10 @@ netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq, netdev->up.n_rxq = n_rxq; rte_free(netdev->tx_q); - netdev_dpdk_alloc_txq(netdev, n_txq); err = dpdk_eth_dev_init(netdev); + netdev_dpdk_alloc_txq(netdev, netdev->real_n_txq); + + netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; ovs_mutex_unlock(&netdev->mutex); ovs_mutex_unlock(&dpdk_mutex); @@ -702,7 +734,7 @@ netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq, static int netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq, - unsigned int n_rxq) + unsigned int n_rxq) { struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); int err = 0; @@ -715,7 +747,8 @@ netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq, ovs_mutex_lock(&netdev->mutex); netdev->up.n_txq = n_txq; - netdev->up.n_rxq = n_rxq; + netdev->real_n_txq = 1; + netdev->up.n_rxq = 1; ovs_mutex_unlock(&netdev->mutex); ovs_mutex_unlock(&dpdk_mutex); @@ -894,7 +927,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, } /* There is vHost TX single queue, So we need to lock it for TX. */ - rte_spinlock_lock(&vhost_dev->txq_lock); + rte_spinlock_lock(&vhost_dev->vhost_tx_lock); do { unsigned int tx_pkts; @@ -930,12 +963,12 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, } } } while (cnt); + rte_spinlock_unlock(&vhost_dev->vhost_tx_lock); rte_spinlock_lock(&vhost_dev->stats_lock); vhost_dev->stats.tx_packets += (total_pkts - cnt); vhost_dev->stats.tx_dropped += cnt; rte_spinlock_unlock(&vhost_dev->stats_lock); - rte_spinlock_unlock(&vhost_dev->txq_lock); out: if (may_steal) { @@ -1071,6 +1104,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, { int i; + if (OVS_UNLIKELY(dev->txq_needs_locking)) { + qid = qid % dev->real_n_txq; + rte_spinlock_lock(&dev->tx_q[qid].tx_lock); + } + if (OVS_UNLIKELY(!may_steal || pkts[0]->source != DPBUF_DPDK)) { struct netdev *netdev = &dev->up; @@ -1116,6 +1154,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, rte_spinlock_unlock(&dev->stats_lock); } } + + if (OVS_UNLIKELY(dev->txq_needs_locking)) { + rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); + } } static int @@ -1770,10 +1812,10 @@ dpdk_ring_open(const char dev_name[], unsigned int *eth_port_id) OVS_REQUIRES(dp } static int -netdev_dpdk_ring_send(struct netdev *netdev, int qid OVS_UNUSED, +netdev_dpdk_ring_send(struct netdev *netdev_, int qid, struct dp_packet **pkts, int cnt, bool may_steal) { - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); unsigned i; /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that the @@ -1784,10 +1826,7 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid OVS_UNUSED, dp_packet_set_rss_hash(pkts[i], 0); } - /* DPDK Rings have a single TX queue, Therefore needs locking. */ - rte_spinlock_lock(&dev->txq_lock); - netdev_dpdk_send__(dev, 0, pkts, cnt, may_steal); - rte_spinlock_unlock(&dev->txq_lock); + netdev_dpdk_send__(netdev, qid, pkts, cnt, may_steal); return 0; } @@ -1965,7 +2004,7 @@ static const struct netdev_class dpdk_ring_class = NULL, netdev_dpdk_ring_construct, netdev_dpdk_destruct, - NULL, + netdev_dpdk_set_multiq, netdev_dpdk_ring_send, netdev_dpdk_get_carrier, netdev_dpdk_get_stats, diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 734601d84..eae1e6462 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -278,6 +278,17 @@ struct netdev_class { /* Configures the number of tx queues and rx queues of 'netdev'. * Return 0 if successful, otherwise a positive errno value. * + * 'n_rxq' specifies the maximum number of receive queues to create. + * The netdev provider might choose to create less (e.g. if the hardware + * supports only a smaller number). The actual number of queues created + * is stored in the 'netdev->n_rxq' field. + * + * 'n_txq' specifies the exact number of transmission queues to create. + * The caller will call netdev_send() concurrently from 'n_txq' different + * threads (with different qid). The netdev provider is responsible for + * making sure that these concurrent calls do not create a race condition + * by using multiple hw queues or locking. + * * On error, the tx queue and rx queue configuration is indeterminant. * Caller should make decision on whether to restore the previous or * the default configuration. Also, caller must make sure there is no diff --git a/lib/netdev.c b/lib/netdev.c index 45f7f29a0..03a754979 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -675,6 +675,16 @@ netdev_rxq_drain(struct netdev_rxq *rx) /* Configures the number of tx queues and rx queues of 'netdev'. * Return 0 if successful, otherwise a positive errno value. * + * 'n_rxq' specifies the maximum number of receive queues to create. + * The netdev provider might choose to create less (e.g. if the hardware + * supports only a smaller number). The caller can check how many have been + * actually created by calling 'netdev_n_rxq()' + * + * 'n_txq' specifies the exact number of transmission queues to create. + * If this function returns successfully, the caller can make 'n_txq' + * concurrent calls to netdev_send() (each one with a different 'qid' in the + * range [0..'n_txq'-1]). + * * On error, the tx queue and rx queue configuration is indeterminant. * Caller should make decision on whether to restore the previous or * the default configuration. Also, caller must make sure there is no diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 79b5606c7..8a604744a 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -155,7 +155,7 @@

- Specifies the number of rx queues to be created for each dpdk + Specifies the maximum number of rx queues to be created for each dpdk interface. If not specified or specified to 0, one rx queue will be created for each dpdk interface by default.

From f046804891635d497e5b91a3b90340951132c1d9 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Sun, 17 May 2015 06:06:01 -0700 Subject: [PATCH 036/146] sparse: Fix sparse when compiling DPDK. Sparse doesn't like several of the DPDK header files. This patch works around it so we can get analysis when compiling DPDK. Signed-off-by: Ethan Jackson Acked-by: Daniele Di Proietto --- include/sparse/automake.mk | 5 +++++ include/sparse/bmi2intrin.h | 25 +++++++++++++++++++++++++ include/sparse/emmintrin.h | 27 +++++++++++++++++++++++++++ include/sparse/rte_atomic.h | 25 +++++++++++++++++++++++++ include/sparse/rte_lcore.h | 23 +++++++++++++++++++++++ include/sparse/rte_vect.h | 23 +++++++++++++++++++++++ 6 files changed, 128 insertions(+) create mode 100644 include/sparse/bmi2intrin.h create mode 100644 include/sparse/emmintrin.h create mode 100644 include/sparse/rte_atomic.h create mode 100644 include/sparse/rte_lcore.h create mode 100644 include/sparse/rte_vect.h diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk index 572c7c2c7..0456ee67d 100644 --- a/include/sparse/automake.mk +++ b/include/sparse/automake.mk @@ -1,10 +1,15 @@ noinst_HEADERS += \ include/sparse/arpa/inet.h \ include/sparse/assert.h \ + include/sparse/bmi2intrin.h \ + include/sparse/emmintrin.h \ include/sparse/math.h \ include/sparse/netinet/in.h \ include/sparse/netinet/ip6.h \ include/sparse/netpacket/packet.h \ include/sparse/pthread.h \ + include/sparse/rte_atomic.h \ + include/sparse/rte_lcore.h \ + include/sparse/rte_vect.h \ include/sparse/sys/socket.h \ include/sparse/sys/wait.h diff --git a/include/sparse/bmi2intrin.h b/include/sparse/bmi2intrin.h new file mode 100644 index 000000000..3ee37bb72 --- /dev/null +++ b/include/sparse/bmi2intrin.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +/* Sparse doesn't know the __int128 type used by GCC 4.9 *intrin.h headers. + * We cannot use a typedef because the type is used with a qualifier + * ('unsigned __int128') */ +#define __int128 int +#include_next +#undef __int128 diff --git a/include/sparse/emmintrin.h b/include/sparse/emmintrin.h new file mode 100644 index 000000000..3810f55a2 --- /dev/null +++ b/include/sparse/emmintrin.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +/* GCC 4.8 *intrin.h headers do not work if these are not defined */ +#define __SSE2__ +#define __SSE__ +#define __MMX__ +#include_next +#undef __MMX__ +#undef __SSE__ +#undef __SSE2__ diff --git a/include/sparse/rte_atomic.h b/include/sparse/rte_atomic.h new file mode 100644 index 000000000..ae49fe5c0 --- /dev/null +++ b/include/sparse/rte_atomic.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +/* Fix sparse technicality about types in one of the function calls by just + * ignoring it. */ +#define __sync_add_and_fetch(a, b) (0) + +/* Get actual definitions for us to annotate and build on. */ +#include_next diff --git a/include/sparse/rte_lcore.h b/include/sparse/rte_lcore.h new file mode 100644 index 000000000..584bfe12c --- /dev/null +++ b/include/sparse/rte_lcore.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +typedef int rte_cpuset_t; + +/* Get actual definitions for us to annotate and build on. */ +#include_next diff --git a/include/sparse/rte_vect.h b/include/sparse/rte_vect.h new file mode 100644 index 000000000..6f6625b7d --- /dev/null +++ b/include/sparse/rte_vect.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +typedef int __m128i; + +/* Get actual definitions for us to annotate and build on. */ +#include_next From 561341365a5274791b00c8e5860838d845275a3f Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 19 May 2015 14:18:58 -0700 Subject: [PATCH 037/146] extract-ofp-fields: Fix most pep8 style issues. Signed-off-by: Joe Stringer Acked-by: YAMAMOTO Takashi --- build-aux/extract-ofp-fields | 77 +++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index b15b01d52..b0e905cf5 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -21,32 +21,32 @@ TYPES = {"u8": 1, "be64": 8, "IPv6": 16} -FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8), - "hexadecimal": ("MFS_HEXADECIMAL", 1, 8), - "Ethernet": ("MFS_ETHERNET", 6, 6), - "IPv4": ("MFS_IPV4", 4, 4), - "IPv6": ("MFS_IPV6", 16,16), - "OpenFlow 1.0 port": ("MFS_OFP_PORT", 2, 2), - "OpenFlow 1.1+ port": ("MFS_OFP_PORT_OXM", 4, 4), - "frag": ("MFS_FRAG", 1, 1), - "tunnel flags": ("MFS_TNL_FLAGS", 2, 2), - "TCP flags": ("MFS_TCP_FLAGS", 2, 2)} +FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8), + "hexadecimal": ("MFS_HEXADECIMAL", 1, 8), + "Ethernet": ("MFS_ETHERNET", 6, 6), + "IPv4": ("MFS_IPV4", 4, 4), + "IPv6": ("MFS_IPV6", 16, 16), + "OpenFlow 1.0 port": ("MFS_OFP_PORT", 2, 2), + "OpenFlow 1.1+ port": ("MFS_OFP_PORT_OXM", 4, 4), + "frag": ("MFS_FRAG", 1, 1), + "tunnel flags": ("MFS_TNL_FLAGS", 2, 2), + "TCP flags": ("MFS_TCP_FLAGS", 2, 2)} PREREQS = {"none": "MFP_NONE", - "ARP": "MFP_ARP", - "VLAN VID": "MFP_VLAN_VID", - "IPv4": "MFP_IPV4", - "IPv6": "MFP_IPV6", - "IPv4/IPv6": "MFP_IP_ANY", - "MPLS": "MFP_MPLS", - "TCP": "MFP_TCP", - "UDP": "MFP_UDP", - "SCTP": "MFP_SCTP", - "ICMPv4": "MFP_ICMPV4", - "ICMPv6": "MFP_ICMPV6", - "ND": "MFP_ND", - "ND solicit": "MFP_ND_SOLICIT", - "ND advert": "MFP_ND_ADVERT"} + "ARP": "MFP_ARP", + "VLAN VID": "MFP_VLAN_VID", + "IPv4": "MFP_IPV4", + "IPv6": "MFP_IPV6", + "IPv4/IPv6": "MFP_IP_ANY", + "MPLS": "MFP_MPLS", + "TCP": "MFP_TCP", + "UDP": "MFP_UDP", + "SCTP": "MFP_SCTP", + "ICMPv4": "MFP_ICMPV4", + "ICMPv6": "MFP_ICMPV6", + "ND": "MFP_ND", + "ND solicit": "MFP_ND_SOLICIT", + "ND advert": "MFP_ND_ADVERT"} # Maps a name prefix into an (experimenter ID, class) pair, so: # @@ -67,6 +67,8 @@ OXM_CLASSES = {"NXM_OF_": (0, 0x0000), # used only to test support for experimenter OXM, since there # are barely any real uses of experimenter OXM in the wild. "NXOXM_ET_": (0x00002320, 0xffff)} + + def oxm_name_to_class(name): prefix = '' class_ = None @@ -76,6 +78,7 @@ def oxm_name_to_class(name): class_ = c return class_ + def decode_version_range(range): if range in VERSION: return (VERSION[range], VERSION[range]) @@ -85,6 +88,7 @@ def decode_version_range(range): a, b = re.match(r'^([^-]+)-([^-]+)$', range).groups() return (VERSION[a], VERSION[b]) + def get_line(): global line global line_number @@ -93,16 +97,21 @@ def get_line(): if line == "": fatal("unexpected end of input") + n_errors = 0 + + def error(msg): global n_errors sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg)) n_errors += 1 + def fatal(msg): error(msg) sys.exit(1) + def usage(): argv0 = os.path.basename(sys.argv[0]) print '''\ @@ -115,6 +124,7 @@ file to #include.\ ''' % {"argv0": argv0} sys.exit(0) + def make_sizeof(s): m = re.match(r'(.*) up to (.*)', s) if m: @@ -123,17 +133,19 @@ def make_sizeof(s): else: return "sizeof(%s)" % s + def parse_oxms(s, prefix, n_bytes): if s == 'none': return () return tuple(parse_oxm(s2.strip(), prefix, n_bytes) for s2 in s.split(',')) + def parse_oxm(s, prefix, n_bytes): m = re.match('([A-Z0-9_]+)\(([0-9]+)\) since(?: OF(1\.[0-9]+) and)? v([12]\.[0-9]+)$', s) if not m: fatal("%s: syntax error parsing %s" % (s, prefix)) - + name, oxm_type, of_version, ovs_version = m.groups() class_ = oxm_name_to_class(name) @@ -161,6 +173,7 @@ def parse_oxm(s, prefix, n_bytes): return (header, name, of_version_nr, ovs_version) + def parse_field(mff, comment): f = {'mff': mff} @@ -246,7 +259,7 @@ def parse_field(mff, comment): f['OF1.0'] = d['OF1.0'] if not d['OF1.0'] in (None, 'exact match', 'CIDR mask'): fatal("%s: unknown OF1.0 match type %s" % (mff, d['OF1.0'])) - + f['OF1.1'] = d['OF1.1'] if not d['OF1.1'] in (None, 'exact match', 'bitwise mask'): fatal("%s: unknown OF1.1 match type %s" % (mff, d['OF1.1'])) @@ -258,6 +271,7 @@ def parse_field(mff, comment): return f + def protocols_to_c(protocols): if protocols == set(['of10', 'of11', 'oxm']): return 'OFPUTIL_P_ANY' @@ -268,7 +282,8 @@ def protocols_to_c(protocols): elif protocols == set([]): return 'OFPUTIL_P_NONE' else: - assert False + assert False + def make_meta_flow(fields): output = [] @@ -331,7 +346,7 @@ def make_meta_flow(fields): output += [" %s," % protocols_to_c(protocols)] output += [" %s," % protocols_to_c(cidr_protocols)] output += [" %s," % protocols_to_c(bitwise_protocols)] - + if f['prefix']: output += [" FLOW_U32OFS(%s)," % f['prefix']] else: @@ -340,9 +355,10 @@ def make_meta_flow(fields): output += ["},"] return output + def make_nx_match(fields): output = [] - print "static struct nxm_field_index all_nxm_fields[] = {"; + print "static struct nxm_field_index all_nxm_fields[] = {" for f in fields: # Sort by OpenFlow version number (nx-match.c depends on this). for oxm in sorted(f['OXM'], key=lambda x: x[2]): @@ -351,6 +367,7 @@ def make_nx_match(fields): print "};" return output + def extract_ofp_fields(mode): global line @@ -490,5 +507,3 @@ if __name__ == '__main__': else: sys.stderr.write("invalid arguments; use --help for help\n") sys.exit(1) - - From 1421f6828a513eea9c3fbdc08b6c4fb4a0ea518e Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 19 May 2015 14:20:31 -0700 Subject: [PATCH 038/146] extract-ofp-fields: Port to python3. Mostly "print foo" -> "print(foo)" and "iteritems() -> items()". The latter may be less efficient in python2, but we're not dealing with massive numbers of items here so it shouldn't noticably slow the build. Signed-off-by: Joe Stringer Acked-by: YAMAMOTO Takashi --- build-aux/extract-ofp-fields | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index b0e905cf5..f05487e1d 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -72,7 +72,7 @@ OXM_CLASSES = {"NXM_OF_": (0, 0x0000), def oxm_name_to_class(name): prefix = '' class_ = None - for p, c in OXM_CLASSES.iteritems(): + for p, c in OXM_CLASSES.items(): if name.startswith(p) and len(p) > len(prefix): prefix = p class_ = c @@ -114,14 +114,14 @@ def fatal(msg): def usage(): argv0 = os.path.basename(sys.argv[0]) - print '''\ + print('''\ %(argv0)s, for extracting OpenFlow field properties from meta-flow.h usage: %(argv0)s INPUT [--meta-flow | --nx-match] where INPUT points to lib/meta-flow.h in the source directory. Depending on the option given, the output written to stdout is intended to be saved either as lib/meta-flow.inc or lib/nx-match.inc for the respective C file to #include.\ -''' % {"argv0": argv0} +''' % {"argv0": argv0}) sys.exit(0) @@ -210,7 +210,7 @@ def parse_field(mff, comment): elif d[key] is not None: fatal("%s: duplicate key" % key) d[key] = value - for key, value in d.iteritems(): + for key, value in d.items(): if not value and key not in ("OF1.0", "OF1.1", "Prefix lookup member", "Notes"): fatal("%s: missing %s" % (mff, key)) @@ -358,13 +358,13 @@ def make_meta_flow(fields): def make_nx_match(fields): output = [] - print "static struct nxm_field_index all_nxm_fields[] = {" + print("static struct nxm_field_index all_nxm_fields[] = {") for f in fields: # Sort by OpenFlow version number (nx-match.c depends on this). for oxm in sorted(f['OXM'], key=lambda x: x[2]): - print """{ .nf = { %s, %d, "%s", %s } },""" % ( - oxm[0], oxm[2], oxm[1], f['mff']) - print "};" + print("""{ .nf = { %s, %d, "%s", %s } },""" % ( + oxm[0], oxm[2], oxm[1], f['mff'])) + print("};") return output @@ -473,9 +473,9 @@ def extract_ofp_fields(mode): if n_errors: sys.exit(1) - print """\ + print("""\ /* Generated automatically; do not modify! "-*- buffer-read-only: t -*- */ -""" +""") if mode == '--meta-flow': output = make_meta_flow(fields) @@ -503,7 +503,7 @@ if __name__ == '__main__': line_number = 0 for oline in extract_ofp_fields(sys.argv[2]): - print oline + print(oline) else: sys.stderr.write("invalid arguments; use --help for help\n") sys.exit(1) From fbe794aace80a840df26d8ccc5545ce0ba7a099b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 22 May 2015 10:24:34 -0700 Subject: [PATCH 039/146] dpctl: Don't print UFID if not present. With verbose dpctl, if userspace runs against an older kernel, every entry will have "ufid:" at the beginning. This is unnecessary and introduces an additional format for scripts to parse. Drop it. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- lib/dpctl.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/dpctl.c b/lib/dpctl.c index e95483e9f..a59745532 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -709,13 +709,9 @@ static void format_dpif_flow(struct ds *ds, const struct dpif_flow *f, struct hmap *ports, struct dpctl_params *dpctl_p) { - if (dpctl_p->verbosity) { - if (f->ufid_present) { - odp_format_ufid(&f->ufid, ds); - ds_put_cstr(ds, ", "); - } else { - ds_put_cstr(ds, "ufid:, "); - } + if (dpctl_p->verbosity && f->ufid_present) { + odp_format_ufid(&f->ufid, ds); + ds_put_cstr(ds, ", "); } odp_flow_format(f->key, f->key_len, f->mask, f->mask_len, ports, ds, dpctl_p->verbosity); From fd8232b32790de08bdec24e523720af90c383dad Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 20 May 2015 13:14:29 -0700 Subject: [PATCH 040/146] Revert "ovs-ofctl: Always prints recirc_id in decimal" As there is the potential for this field to be maskable in future, and the dpctl "-m" output prints a mask for it, return it to hexadecimal. The next patch will make this consistent to the recirc action by making the action print the recirc_id in hex as well. Signed-off-by: Joe Stringer Signed-off-by: Andy Zhou --- lib/match.c | 24 +++++------------------- tests/ofproto-dpif.at | 4 ++-- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/lib/match.c b/lib/match.c index b1550842a..7d0b4095f 100644 --- a/lib/match.c +++ b/lib/match.c @@ -843,30 +843,16 @@ format_be32_masked(struct ds *s, const char *name, } static void -format_uint32_masked__(struct ds *s, const char *name, - uint32_t value, uint32_t mask, const char *format) +format_uint32_masked(struct ds *s, const char *name, + uint32_t value, uint32_t mask) { if (mask) { - ds_put_format(s, format, name, value); + ds_put_format(s, "%s=%#"PRIx32, name, value); if (mask != UINT32_MAX) { ds_put_format(s, "/%#"PRIx32, mask); } ds_put_char(s, ','); } - -} -static void -format_uint32_masked(struct ds *s, const char *name, - uint32_t value, uint32_t mask) -{ - format_uint32_masked__(s, name, value, mask, "%s=%#"PRIx32); -} - -static void -format_decimal_uint32_masked(struct ds *s, const char *name, - uint32_t value, uint32_t mask) -{ - format_uint32_masked__(s, name, value, mask, "%s=%"PRIu32); } static void @@ -935,8 +921,8 @@ match_format(const struct match *match, struct ds *s, int priority) format_uint32_masked(s, "pkt_mark", f->pkt_mark, wc->masks.pkt_mark); if (wc->masks.recirc_id) { - format_decimal_uint32_masked(s, "recirc_id", f->recirc_id, - wc->masks.recirc_id); + format_uint32_masked(s, "recirc_id", f->recirc_id, + wc->masks.recirc_id); } if (wc->masks.dp_hash) { diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 3361dc2d8..139dfdd6b 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -186,7 +186,7 @@ table=0 priority=2 in_port=5 dl_vlan=1 actions=drop AT_CHECK([ovs-ofctl add-flows br-int flows.txt]) # Sends a packet to trigger recirculation. -# Should generate recirc_id(2),dp_hash(0xc1261ba2/0xff). +# Should generate recirc_id(0x2),dp_hash(0xc1261ba2/0xff). AT_CHECK([ovs-appctl netdev-dummy/receive p5 "in_port(5),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1)"]) # Collects flow stats. @@ -195,7 +195,7 @@ AT_CHECK([ovs-appctl revalidator/purge], [0]) # Checks the flow stats in br1, should only be one flow with non-zero # 'n_packets' from internal table. AT_CHECK([ovs-appctl bridge/dump-flows br1 | ofctl_strip | grep -- "n_packets" | grep -- "table_id" | sed -e 's/dp_hash=0x[[0-9a-f]][[0-9a-f]]*/dp_hash=0x0/' -e 's/output:[[0-9]][[0-9]]*/output/'], [0], [dnl -table_id=254, n_packets=1, n_bytes=64, priority=20,recirc_id=2,dp_hash=0x0/0xff,actions=output +table_id=254, n_packets=1, n_bytes=64, priority=20,recirc_id=0x2,dp_hash=0x0/0xff,actions=output ]) # Checks the flow stats in br-int, should be only one match. From 8f19f0a70352dfa555cffa4f7f34dab22870dc49 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 20 May 2015 13:46:01 -0700 Subject: [PATCH 041/146] odp-util: always output recirc_id in hex The match is in hex, this makes it more consistent. Signed-off-by: Joe Stringer Signed-off-by: Andy Zhou --- lib/odp-util.c | 2 +- tests/mpls-xlate.at | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 962b84b2c..4845d28ef 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -495,7 +495,7 @@ format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key, static void format_odp_recirc_action(struct ds *ds, uint32_t recirc_id) { - ds_put_format(ds, "recirc(%"PRIu32")", recirc_id); + ds_put_format(ds, "recirc(%#"PRIx32")", recirc_id); } static void diff --git a/tests/mpls-xlate.at b/tests/mpls-xlate.at index 571b8ce45..8f286c3a5 100644 --- a/tests/mpls-xlate.at +++ b/tests/mpls-xlate.at @@ -47,12 +47,12 @@ AT_CHECK([tail -1 stdout], [0], dnl Double MPLS pop AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=60,tc=0/0,ttl=64,bos=0)' -generate], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: pop_mpls(eth_type=0x8847),recirc(1) + [Datapath actions: pop_mpls(eth_type=0x8847),recirc(0x1) ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(1),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=50,tc=0/0,ttl=64,bos=0)' -generate], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: pop_mpls(eth_type=0x800),recirc(2) + [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x2) ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)' -generate], [0], [stdout]) From 8217bc65d26c3f758bfbda7b0bc070eb1cd03a35 Mon Sep 17 00:00:00 2001 From: Nithin Raju Date: Mon, 25 May 2015 23:20:23 -0700 Subject: [PATCH 042/146] datpath-windows: Make PacketIO.c compilable with WDK8. There's some code in PacketIO.c that is supported in WDK 8.1 only. The variable declarations for that code must also be WDK 8.1 only. Signed-off-by: Nithin Raju Acked-by: Alin Gabriel Serdean Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/PacketIO.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 902e9aa4f..ed629fdde 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -216,8 +216,10 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, LIST_ENTRY missedPackets; UINT32 num = 0; OvsCompletionList completionList; +#if (NDIS_SUPPORT_NDIS640) PNET_BUFFER_LIST nativeForwardedNbls = NULL; PNET_BUFFER_LIST *nextNativeForwardedNbl = &nativeForwardedNbls; +#endif dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)? NDIS_RWL_AT_DISPATCH_LEVEL : 0; @@ -254,7 +256,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, sourcePort == switchContext->virtualExternalPortId); continue; } -#endif +#endif /* NDIS_SUPPORT_NDIS640 */ /* Ethernet Header is a guaranteed safe access. */ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); @@ -353,11 +355,13 @@ dropit: } } +#if (NDIS_SUPPORT_NDIS640) if (nativeForwardedNbls) { /* This is NVGRE encapsulated traffic and is forwarded to NDIS * in order to be handled by the HNV module. */ OvsSendNBLIngress(switchContext, nativeForwardedNbls, SendFlags); } +#endif /* NDIS_SUPPORT_NDIS640 */ /* Queue the missed packets. */ OvsQueuePackets(&missedPackets, num); From b519432205c36bda5c7331f77a49eaaa919967ad Mon Sep 17 00:00:00 2001 From: Ansis Atteka Date: Tue, 26 May 2015 16:49:49 -0700 Subject: [PATCH 043/146] debian: install openvswitch kernel module under "updates" directory This patch fixes a bug where "modprobe openvswitch" command on Ubuntu distribution would have sometimes tried to load OVS kernel module that shipped together with Linux Kernel, even though one had also installed OVS datapath debian package created with module-assistant. Because of this issue force-reload-kmod command occasionally malfunctioned and failed to load the right kernel module. This bug happened *occasionally* because the default Ubuntu depmod configuration in /etc/depmod.d/ubuntu.conf is set to look for kernel modules first in "updates" directory, then in "ubuntu" directory and then in other directories. If there were two openvswitch.ko modules in "other directories", then modprobe would have loaded kernel module that was nondeterministically listed first by file system. Signed-off-by: Ansis Atteka Acked-by: Ben Pfaff --- debian/rules.modules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/rules.modules b/debian/rules.modules index 2eb944055..657168394 100755 --- a/debian/rules.modules +++ b/debian/rules.modules @@ -17,7 +17,7 @@ kdist_clean: kdist_config: prep-deb-files .PHONY: binary-modules -binary-modules: DSTDIR = $(CURDIR)/debian/$(PKGNAME)/lib/modules/$(KVERS)/kernel +binary-modules: DSTDIR = $(CURDIR)/debian/$(PKGNAME)/lib/modules/$(KVERS)/kernel/updates binary-modules: prep-deb-files dh_testdir dh_testroot From 694ebbc85eb164a83926ace56a4d8b90424a5447 Mon Sep 17 00:00:00 2001 From: Alin Serdean Date: Tue, 19 May 2015 17:21:25 +0000 Subject: [PATCH 044/146] netdev-windows: Add ARP lookup and next hop functionality. This patch implements two functionalities needed for an active manager: 1. ARP lookup 2. Next hop The first uses the Windows GetIpNetTable() function: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365956%28v=vs.85%29.aspx The second one uses GetAdaptersAddresses() function: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365915%28v=vs.85%29.aspx Both API's are found in the Iphlpapi library. We need to add this library when compiling. Documentation and appveyor config has been updated to match the use of the new library. Tested using opendaylight. Signed-off-by: Alin Gabriel Serdean Reported-by: Alin Gabriel Serdean Reported-at: https://github.com/openvswitch/ovs-issues/issues/63 Acked-by: Eitan Eliahu Signed-off-by: Ben Pfaff --- INSTALL.Windows.md | 25 +++++----- lib/netdev-windows.c | 114 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 12 deletions(-) diff --git a/INSTALL.Windows.md b/INSTALL.Windows.md index 78af0a173..0ec0af0b6 100644 --- a/INSTALL.Windows.md +++ b/INSTALL.Windows.md @@ -62,9 +62,10 @@ or from a distribution tar ball. the right compiler, linker, libraries, Open vSwitch component installation directories, etc. For example, - % ./configure CC=./build-aux/cccl LD="`which link`" LIBS="-lws2_32" \ - --prefix="C:/openvswitch/usr" --localstatedir="C:/openvswitch/var" \ - --sysconfdir="C:/openvswitch/etc" --with-pthread="C:/pthread" + % ./configure CC=./build-aux/cccl LD="`which link`" \ + LIBS="-lws2_32 -liphlpapi" --prefix="C:/openvswitch/usr" \ + --localstatedir="C:/openvswitch/var" --sysconfdir="C:/openvswitch/etc" \ + --with-pthread="C:/pthread" By default, the above enables compiler optimization for fast code. For default compiler optimization, pass the "--with-debug" configure @@ -114,10 +115,10 @@ Note down the directory where OpenSSL is installed (e.g.: C:/OpenSSL-Win32). * While configuring the package, specify the OpenSSL directory path. For example, - % ./configure CC=./build-aux/cccl LD="`which link`" LIBS="-lws2_32" \ - --prefix="C:/openvswitch/usr" --localstatedir="C:/openvswitch/var" \ - --sysconfdir="C:/openvswitch/etc" --with-pthread="C:/pthread" \ - --enable-ssl --with-openssl="C:/OpenSSL-Win32" + % ./configure CC=./build-aux/cccl LD="`which link`" \ + LIBS="-lws2_32 -liphlpapi" --prefix="C:/openvswitch/usr" \ + --localstatedir="C:/openvswitch/var" --sysconfdir="C:/openvswitch/etc" \ + --with-pthread="C:/pthread" --enable-ssl --with-openssl="C:/OpenSSL-Win32" * Run make for the ported executables. @@ -131,11 +132,11 @@ level 'make' will invoke building the kernel datapath, if the '--with-vstudioddk' argument is specified while configuring the package. For example, - % ./configure CC=./build-aux/cccl LD="`which link`" LIBS="-lws2_32" \ - --prefix="C:/openvswitch/usr" --localstatedir="C:/openvswitch/var" \ - --sysconfdir="C:/openvswitch/etc" --with-pthread="C:/pthread" \ - --enable-ssl --with-openssl="C:/OpenSSL-Win32" \ - --with-vstudioddk="" + % ./configure CC=./build-aux/cccl LD="`which link`" \ + LIBS="-lws2_32 -liphlpapi" --prefix="C:/openvswitch/usr" \ + --localstatedir="C:/openvswitch/var" --sysconfdir="C:/openvswitch/etc" \ + --with-pthread="C:/pthread" --enable-ssl \ + --with-openssl="C:/OpenSSL-Win32" --with-vstudioddk="" Possible values for "" are: "Win8.1 Debug", "Win8.1 Release", "Win8 Debug" and "Win8 Release". diff --git a/lib/netdev-windows.c b/lib/netdev-windows.c index 1fc1da710..1eb872705 100644 --- a/lib/netdev-windows.c +++ b/lib/netdev-windows.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -373,6 +374,117 @@ netdev_windows_update_flags(struct netdev *netdev_, return 0; } +/* Looks up in the ARP table entry for a given 'ip'. If it is found, the + * corresponding MAC address will be copied in 'mac' and return 0. If no + * matching entry is found or an error occurs it will log it and return ENXIO. + */ +static int +netdev_windows_arp_lookup(const struct netdev *netdev, + ovs_be32 ip, uint8_t mac[ETH_ADDR_LEN]) +{ + PMIB_IPNETTABLE arp_table = NULL; + /* The buffer length of all ARP entries */ + uint32_t buffer_length = 0; + uint32_t ret_val = 0; + uint32_t counter = 0; + + ret_val = GetIpNetTable(arp_table, &buffer_length, false); + + if (ret_val != ERROR_INSUFFICIENT_BUFFER ) { + VLOG_ERR("Call to GetIpNetTable failed with error: %s", + ovs_format_message(ret_val)); + return ENXIO; + } + + arp_table = (MIB_IPNETTABLE *) malloc(buffer_length); + + if (arp_table == NULL) { + VLOG_ERR("Could not allocate memory for all the interfaces"); + return ENXIO; + } + + ret_val = GetIpNetTable(arp_table, &buffer_length, false); + + if (ret_val == NO_ERROR) { + for (counter = 0; counter < arp_table->dwNumEntries; counter++) { + if (arp_table->table[counter].dwAddr == ip) { + memcpy(mac, arp_table->table[counter].bPhysAddr, ETH_ADDR_LEN); + + free(arp_table); + return 0; + } + } + } else { + VLOG_ERR("Call to GetIpNetTable failed with error: %s", + ovs_format_message(ret_val)); + } + + free(arp_table); + return ENXIO; +} + +static int +netdev_windows_get_next_hop(const struct in_addr *host, + struct in_addr *next_hop, + char **netdev_name) +{ + uint32_t ret_val = 0; + /* The buffer length of all addresses */ + uint32_t buffer_length = 1000; + PIP_ADAPTER_ADDRESSES all_addr = NULL; + PIP_ADAPTER_ADDRESSES cur_addr = NULL; + + ret_val = GetAdaptersAddresses(AF_INET, + GAA_FLAG_INCLUDE_PREFIX | + GAA_FLAG_INCLUDE_GATEWAYS, + NULL, all_addr, &buffer_length); + + if (ret_val != ERROR_INSUFFICIENT_BUFFER ) { + VLOG_ERR("Call to GetAdaptersAddresses failed with error: %s", + ovs_format_message(ret_val)); + return ENXIO; + } + + all_addr = (IP_ADAPTER_ADDRESSES *) malloc(buffer_length); + + if (all_addr == NULL) { + VLOG_ERR("Could not allocate memory for all the interfaces"); + return ENXIO; + } + + ret_val = GetAdaptersAddresses(AF_INET, + GAA_FLAG_INCLUDE_PREFIX | + GAA_FLAG_INCLUDE_GATEWAYS, + NULL, all_addr, &buffer_length); + + if (ret_val == NO_ERROR) { + cur_addr = all_addr; + while (cur_addr) { + if(cur_addr->FirstGatewayAddress && + cur_addr->FirstGatewayAddress->Address.lpSockaddr) { + struct sockaddr_in *ipv4 = (struct sockaddr_in *) + cur_addr->FirstGatewayAddress->Address.lpSockaddr; + next_hop->s_addr = ipv4->sin_addr.S_un.S_addr; + *netdev_name = xstrdup((char *)cur_addr->FriendlyName); + + free(all_addr); + + return 0; + } + + cur_addr = cur_addr->Next; + } + } else { + VLOG_ERR("Call to GetAdaptersAddresses failed with error: %s", + ovs_format_message(ret_val)); + } + + if (all_addr) { + free(all_addr); + } + return ENXIO; +} + static int netdev_windows_internal_construct(struct netdev *netdev_) { @@ -390,6 +502,8 @@ netdev_windows_internal_construct(struct netdev *netdev_) .get_etheraddr = netdev_windows_get_etheraddr, \ .set_etheraddr = netdev_windows_set_etheraddr, \ .update_flags = netdev_windows_update_flags, \ + .get_next_hop = netdev_windows_get_next_hop, \ + .arp_lookup = netdev_windows_arp_lookup, \ } const struct netdev_class netdev_windows_class = From 05444f07247388d5537b99d6849af31b034fcc9f Mon Sep 17 00:00:00 2001 From: Gurucharan Shetty Date: Mon, 25 May 2015 00:50:01 -0700 Subject: [PATCH 045/146] ovs-docker: Add the ability to set the mac address. For testing OVN, it is useful to set the mac address of the container. Since ovs-docker hasn't been part of any released versions of OVS, it is probably OK to change the options style. Signed-off-by: Gurucharan Shetty --- INSTALL.Docker.md | 8 ++++++-- utilities/ovs-docker | 44 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/INSTALL.Docker.md b/INSTALL.Docker.md index 6bd566024..b505a9b55 100644 --- a/INSTALL.Docker.md +++ b/INSTALL.Docker.md @@ -55,9 +55,13 @@ and then attaches it to the Open vSwitch bridge 'br-int'. This is done by creating a veth pair. One end of the interface becomes 'eth1' inside the container and the other end attaches to 'br-int'. -The script also lets one to add an IP address to the interface. e.g.: +The script also lets one to add IP address, MAC address and Gateway address to +the interface. e.g.: -`% ovs-docker add-port br-int eth1 $CONTAINER_ID 192.168.1.1/24` +``` +% ovs-docker add-port br-int eth1 $CONTAINER_ID --ipaddress=192.168.1.2/24 \ +--macaddress=a2:c3:0d:49:7f:f8 --gateway=192.168.1.1 +``` * A previously added network interface can be deleted. e.g.: diff --git a/utilities/ovs-docker b/utilities/ovs-docker index dd2aef1c6..12c324697 100755 --- a/utilities/ovs-docker +++ b/utilities/ovs-docker @@ -65,14 +65,34 @@ add_port () { BRIDGE="$1" INTERFACE="$2" CONTAINER="$3" - ADDRESS="$4" - GATEWAY="$5" - if [ "$#" -lt 3 ]; then - usage + if [ -z "$BRIDGE" ] || [ -z "$INTERFACE" ] || [ -z "$CONTAINER" ]; then + echo >&2 "$UTIL add-port: not enough arguments (use --help for help)" exit 1 fi + shift 3 + while [ $# -ne 0 ]; do + case $1 in + --ipaddress=*) + ADDRESS=`expr X"$1" : 'X[^=]*=\(.*\)'` + shift + ;; + --macaddress=*) + MACADDRESS=`expr X"$1" : 'X[^=]*=\(.*\)'` + shift + ;; + --gateway=*) + GATEWAY=`expr X"$1" : 'X[^=]*=\(.*\)'` + shift + ;; + *) + echo >&2 "$UTIL add-port: unknown option \"$1\"" + exit 1 + ;; + esac + done + # Check if a port is already attached for the given container and interface PORT=`get_port_for_container_interface "$CONTAINER" "$INTERFACE" \ 2>/dev/null` @@ -121,6 +141,10 @@ add_port () { ip netns exec "$PID" ip addr add "$ADDRESS" dev "$INTERFACE" fi + if [ -n "$MACADDRESS" ]; then + ip netns exec "$PID" ip link set dev "$INTERFACE" address "$MACADDRESS" + fi + if [ -n "$GATEWAY" ]; then ip netns exec "$PID" ip route add default via "$GATEWAY" fi @@ -190,14 +214,16 @@ ${UTIL}: Performs integration of Open vSwitch with Docker. usage: ${UTIL} COMMAND Commands: - add-port BRIDGE INTERFACE CONTAINER [ADDRESS [GATEWAY]] + add-port BRIDGE INTERFACE CONTAINER [--ipaddress="ADDRESS"] + [--gateway=GATEWAY] [--macaddress="MACADDRESS"] Adds INTERFACE inside CONTAINER and connects it as a port in Open vSwitch BRIDGE. Optionally, sets ADDRESS on INTERFACE. ADDRESS can include a '/' to represent network - prefix length. Along with ADDRESS, optionally set the - default gateway for the container. e.g.: - ${UTIL} add-port br-int eth1 c474a0e2830e 192.168.1.2/24 \ - 192.168.1.1 + prefix length. Optionally, sets a GATEWAY and a MACADDRESS. + e.g.: + ${UTIL} add-port br-int eth1 c474a0e2830e + --ipaddress=192.168.1.2/24 --gateway=192.168.1.1 + --macaddress="a2:c3:0d:49:7f:f8" del-port BRIDGE INTERFACE CONTAINER Deletes INTERFACE inside CONTAINER and removes its connection to Open vSwitch BRIDGE. e.g.: From 6dd3cc3989bb350ffe8bbe2082e94373350ae9c4 Mon Sep 17 00:00:00 2001 From: Gurucharan Shetty Date: Mon, 25 May 2015 01:04:01 -0700 Subject: [PATCH 046/146] ovs-docker: Ability to set the MTU of the container interface. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When containers are connected to a OVS bridge and tunnels are created, it makese sense to reduce the MTU of the interafce. Reported-by: Aurélien Poulai Signed-off-by: Gurucharan Shetty --- AUTHORS | 1 + INSTALL.Docker.md | 6 +++--- utilities/ovs-docker | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/AUTHORS b/AUTHORS index 5178b4346..574fba5b8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -213,6 +213,7 @@ Anuprem Chalvadi achalvadi@vmware.com Ariel Tubaltsev atubaltsev@vmware.com Arkajit Ghosh arkajit.ghosh@tcs.com Atzm Watanabe atzm@stratosphere.co.jp +Aurélien Poulain aurepoulain@viacesi.fr Bastian Blank waldi@debian.org Ben Basler bbasler@nicira.com Bob Ball bob.ball@citrix.com diff --git a/INSTALL.Docker.md b/INSTALL.Docker.md index b505a9b55..9e140435c 100644 --- a/INSTALL.Docker.md +++ b/INSTALL.Docker.md @@ -55,12 +55,12 @@ and then attaches it to the Open vSwitch bridge 'br-int'. This is done by creating a veth pair. One end of the interface becomes 'eth1' inside the container and the other end attaches to 'br-int'. -The script also lets one to add IP address, MAC address and Gateway address to -the interface. e.g.: +The script also lets one to add IP address, MAC address, Gateway address and +MTU for the interface. e.g.: ``` % ovs-docker add-port br-int eth1 $CONTAINER_ID --ipaddress=192.168.1.2/24 \ ---macaddress=a2:c3:0d:49:7f:f8 --gateway=192.168.1.1 +--macaddress=a2:c3:0d:49:7f:f8 --gateway=192.168.1.1 --mtu=1450 ``` * A previously added network interface can be deleted. e.g.: diff --git a/utilities/ovs-docker b/utilities/ovs-docker index 12c324697..43cea5439 100755 --- a/utilities/ovs-docker +++ b/utilities/ovs-docker @@ -86,6 +86,10 @@ add_port () { GATEWAY=`expr X"$1" : 'X[^=]*=\(.*\)'` shift ;; + --mtu=*) + MTU=`expr X"$1" : 'X[^=]*=\(.*\)'` + shift + ;; *) echo >&2 "$UTIL add-port: unknown option \"$1\"" exit 1 @@ -137,6 +141,10 @@ add_port () { ip netns exec "$PID" ip link set dev "${PORTNAME}_c" name "$INTERFACE" ip netns exec "$PID" ip link set "$INTERFACE" up + if [ -n "$MTU" ]; then + ip netns exec "$PID" ip link set dev "$INTERFACE" mtu "$MTU" + fi + if [ -n "$ADDRESS" ]; then ip netns exec "$PID" ip addr add "$ADDRESS" dev "$INTERFACE" fi @@ -216,14 +224,15 @@ usage: ${UTIL} COMMAND Commands: add-port BRIDGE INTERFACE CONTAINER [--ipaddress="ADDRESS"] [--gateway=GATEWAY] [--macaddress="MACADDRESS"] + [--mtu=MTU] Adds INTERFACE inside CONTAINER and connects it as a port in Open vSwitch BRIDGE. Optionally, sets ADDRESS on INTERFACE. ADDRESS can include a '/' to represent network - prefix length. Optionally, sets a GATEWAY and a MACADDRESS. - e.g.: + prefix length. Optionally, sets a GATEWAY, MACADDRESS + and MTU. e.g.: ${UTIL} add-port br-int eth1 c474a0e2830e --ipaddress=192.168.1.2/24 --gateway=192.168.1.1 - --macaddress="a2:c3:0d:49:7f:f8" + --macaddress="a2:c3:0d:49:7f:f8" --mtu=1450 del-port BRIDGE INTERFACE CONTAINER Deletes INTERFACE inside CONTAINER and removes its connection to Open vSwitch BRIDGE. e.g.: From 51542eddd89d4005dc99deae99c6980302174ea5 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 27 Apr 2015 23:01:09 -0300 Subject: [PATCH 047/146] rhel: Fix rundir ownership. Although the ovs-ctl/ovs-lib takes care of creating the rundir, it is correct to let the systemd manages the directory and let the rpm know about the ownership too. Signed-off-by: Flavio Leitner Signed-off-by: Ben Pfaff --- rhel/openvswitch-fedora.spec.in | 1 + rhel/usr_lib_systemd_system_openvswitch-nonetwork.service | 2 ++ 2 files changed, 3 insertions(+) diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in index f3dbebfcf..b3ad7d8ae 100644 --- a/rhel/openvswitch-fedora.spec.in +++ b/rhel/openvswitch-fedora.spec.in @@ -258,6 +258,7 @@ rm -rf $RPM_BUILD_ROOT %doc FAQ.md NEWS INSTALL.DPDK.md rhel/README.RHEL /var/lib/openvswitch /var/log/openvswitch +%ghost %attr(755,root,root) %{_rundir}/openvswitch %exclude %{_bindir}/ovs-benchmark %exclude %{_bindir}/ovs-parse-backtrace %exclude %{_bindir}/ovs-pcap diff --git a/rhel/usr_lib_systemd_system_openvswitch-nonetwork.service b/rhel/usr_lib_systemd_system_openvswitch-nonetwork.service index 870b25e89..e4c2a668c 100644 --- a/rhel/usr_lib_systemd_system_openvswitch-nonetwork.service +++ b/rhel/usr_lib_systemd_system_openvswitch-nonetwork.service @@ -11,3 +11,5 @@ EnvironmentFile=-/etc/sysconfig/openvswitch ExecStart=/usr/share/openvswitch/scripts/ovs-ctl start \ --system-id=random $OPTIONS ExecStop=/usr/share/openvswitch/scripts/ovs-ctl stop +RuntimeDirectory=openvswitch +RuntimeDirectoryMode=0755 From 3b4099b496acadbb47818b60a1343f819ae36be2 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 27 Apr 2015 23:00:14 -0300 Subject: [PATCH 048/146] rhel: Add buildrequires for procps-ng. The testsuite is enabled by default and uses some of the tools provided by procps-ng. Signed-off-by: Flavio Leitner Signed-off-by: Ben Pfaff --- rhel/openvswitch-fedora.spec.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in index b3ad7d8ae..2c6f6e939 100644 --- a/rhel/openvswitch-fedora.spec.in +++ b/rhel/openvswitch-fedora.spec.in @@ -35,6 +35,8 @@ BuildRequires: systemd-units openssl openssl-devel BuildRequires: python python-twisted-core python-zope-interface PyQt4 BuildRequires: desktop-file-utils BuildRequires: groff graphviz +# make check dependencies +BuildRequires: procps-ng Requires: openssl iproute module-init-tools #Upstream kernel commit 4f647e0a3c37b8d5086214128614a136064110c3 From d2843eba6d939526eb3daef511099bed2d18dd2c Mon Sep 17 00:00:00 2001 From: Gurucharan Shetty Date: Tue, 7 Apr 2015 17:34:27 -0700 Subject: [PATCH 049/146] ovs_threads: Avoid running pthread destructors from main thread exit. Windows uses pthreads-win32 library to provide the Linux pthread functionality. It is observed that when the main thread calls a pthread destructor after it exits, undefined behavior is seen (e.g., junk values in data, causing pthread deadlocks). Similar behavior has been seen by other people as seen in the following email thread: https://sourceware.org/ml/pthreads-win32/2003/msg00001.html To avoid this, this commit de-registers the thread destructor when the main thread exits (via the atexit handler). Signed-off-by: Gurucharan Shetty Acked-by: Ben Pfaff --- lib/ovs-rcu.c | 14 ++++++++++++++ lib/ovs-thread.c | 15 +++++++++++++++ tests/test-atomic.c | 2 ++ 3 files changed, 31 insertions(+) diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index 76659bbe0..e0634cfab 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -16,6 +16,7 @@ #include #include "ovs-rcu.h" +#include "fatal-signal.h" #include "guarded-list.h" #include "list.h" #include "ovs-thread.h" @@ -313,6 +314,18 @@ ovsrcu_thread_exit_cb(void *perthread) ovsrcu_unregister__(perthread); } +/* Cancels the callback to ovsrcu_thread_exit_cb(). + * + * Cancelling the call to the destructor during the main thread exit + * is needed while using pthreads-win32 library in Windows. It has been + * observed that in pthreads-win32, a call to the destructor during + * main thread exit causes undefined behavior. */ +static void +ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED) +{ + pthread_setspecific(perthread_key, NULL); +} + static void ovsrcu_init_module(void) { @@ -320,6 +333,7 @@ ovsrcu_init_module(void) if (ovsthread_once_start(&once)) { global_seqno = seq_create(); xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb); + fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true); list_init(&ovsrcu_threads); ovs_mutex_init(&ovsrcu_threads_mutex); diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c index 416109563..b2d05a6cb 100644 --- a/lib/ovs-thread.c +++ b/lib/ovs-thread.c @@ -24,6 +24,7 @@ #include #include #include "compiler.h" +#include "fatal-signal.h" #include "hash.h" #include "list.h" #include "netdev-dpdk.h" @@ -668,6 +669,18 @@ ovsthread_key_destruct__(void *slots_) free(slots); } +/* Cancels the callback to ovsthread_key_destruct__(). + * + * Cancelling the call to the destructor during the main thread exit + * is needed while using pthreads-win32 library in Windows. It has been + * observed that in pthreads-win32, a call to the destructor during + * main thread exit causes undefined behavior. */ +static void +ovsthread_cancel_ovsthread_key_destruct__(void *aux OVS_UNUSED) +{ + pthread_setspecific(tsd_key, NULL); +} + /* Initializes '*keyp' as a thread-specific data key. The data items are * initially null in all threads. * @@ -684,6 +697,8 @@ ovsthread_key_create(ovsthread_key_t *keyp, void (*destructor)(void *)) if (ovsthread_once_start(&once)) { xpthread_key_create(&tsd_key, ovsthread_key_destruct__); + fatal_signal_add_hook(ovsthread_cancel_ovsthread_key_destruct__, + NULL, NULL, true); ovsthread_once_done(&once); } diff --git a/tests/test-atomic.c b/tests/test-atomic.c index 50b3b7a9e..2af6a2649 100644 --- a/tests/test-atomic.c +++ b/tests/test-atomic.c @@ -16,6 +16,7 @@ #include #undef NDEBUG +#include "fatal-signal.h" #include "ovs-atomic.h" #include "ovstest.h" #include "ovs-thread.h" @@ -413,6 +414,7 @@ test_atomic_seq_cst(void) static void test_atomic_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { + fatal_signal_init(); test_atomic_plain(); test_atomic_relaxed(); test_atomic_consume(); From 8c10186654f0541252da4693eaf7c9ae281240db Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 19 May 2015 14:13:12 -0700 Subject: [PATCH 050/146] extract-ofp-fields: Detect duplicate fields. Figure out if a developer accidentally defines new NXM fields using an existing number, and warn them. Useful particularly if new fields are introduced upstream while rebasing an in-progress patchset. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- build-aux/extract-ofp-fields | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index f05487e1d..315552d12 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -141,7 +141,12 @@ def parse_oxms(s, prefix, n_bytes): return tuple(parse_oxm(s2.strip(), prefix, n_bytes) for s2 in s.split(',')) +match_types = dict() + + def parse_oxm(s, prefix, n_bytes): + global match_types + m = re.match('([A-Z0-9_]+)\(([0-9]+)\) since(?: OF(1\.[0-9]+) and)? v([12]\.[0-9]+)$', s) if not m: fatal("%s: syntax error parsing %s" % (s, prefix)) @@ -153,6 +158,14 @@ def parse_oxm(s, prefix, n_bytes): fatal("unknown OXM class for %s" % name) oxm_vendor, oxm_class = class_ + if class_ in match_types: + if oxm_type in match_types[class_]: + fatal("duplicate match type for %s (conflicts with %s)" % + (name, match_types[class_][oxm_type])) + else: + match_types[class_] = dict() + match_types[class_][oxm_type] = name + # Normally the oxm_length is the size of the field, but for experimenter # OXMs oxm_length also includes the 4-byte experimenter ID. oxm_length = n_bytes From 5dd63cf50445b9752709052cc5bee3dafb5968e4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 20 May 2015 16:48:57 -0700 Subject: [PATCH 051/146] ofproto-dpif: Make odp/ofp parse errors more clear. It's useful to distinguish which type of flow that the parser thinks it is parsing when we output error messages. Signed-off-by: Joe Stringer Acked-by: Andy Zhou --- ofproto/ofproto-dpif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d151bb7d5..b11938420 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -4634,7 +4634,7 @@ parse_flow_and_packet(int argc, const char *argv[], } if (odp_flow_key_to_flow(odp_key.data, odp_key.size, flow) == ODP_FIT_ERROR) { - error = "Failed to parse flow key"; + error = "Failed to parse datapath flow key"; goto exit; } @@ -4651,7 +4651,7 @@ parse_flow_and_packet(int argc, const char *argv[], char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL); if (err) { - m_err = xasprintf("Bad flow syntax: %s", err); + m_err = xasprintf("Bad openflow flow syntax: %s", err); free(err); goto exit; } else { From eb731b767babf02bbe7fb8b9d35f9da30c8796cb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 20 May 2015 17:04:33 -0700 Subject: [PATCH 052/146] odp-util: Skip UFID when parsing datapath key. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- lib/odp-util.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 4845d28ef..e6c1070f7 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -51,6 +51,8 @@ VLOG_DEFINE_THIS_MODULE(odp_util); * from another. */ static const char *delimiters = ", \t\r\n"; +static const char *hex_chars = "0123456789abcdefABCDEF"; + static int parse_odp_key_mask_attr(const char *, const struct simap *port_names, struct ofpbuf *, struct ofpbuf *); static void format_odp_key_attr(const struct nlattr *a, @@ -2078,7 +2080,7 @@ odp_ufid_from_string(const char *s_, ovs_u128 *ufid) s += 2; } - n = strspn(s, "0123456789abcdefABCDEF"); + n = strspn(s, hex_chars); if (n != 32) { return -EINVAL; } @@ -2714,6 +2716,17 @@ static int parse_odp_key_mask_attr(const char *s, const struct simap *port_names, struct ofpbuf *key, struct ofpbuf *mask) { + if (!strncmp(s, "ufid:", 5)) { + const char *start = s; + + /* Skip UFID. */ + s += 5; + s += strspn(s, hex_chars); + s += strspn(s, delimiters); + + return s - start; + } + SCAN_SINGLE("skb_priority(", uint32_t, u32, OVS_KEY_ATTR_PRIORITY); SCAN_SINGLE("skb_mark(", uint32_t, u32, OVS_KEY_ATTR_SKB_MARK); SCAN_SINGLE_FULLY_MASKED("recirc_id(", uint32_t, u32, From e063a73a9b20aff9611c631d3773f6e21b35325e Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 20 May 2015 10:35:15 -0700 Subject: [PATCH 053/146] tests: Fix in_port(name) test for ofproto/trace. Commit c2a77f33ade (tests/ofproto-dpif: Use vlog to test dpif behaviour.) mistakenly changed the test which checked that ovs-dpctl accepts named ports as input. Restore the name to the test. Reported-by: Gurucharan Shetty Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- tests/ofproto-dpif.at | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 139dfdd6b..f3a660b95 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -4155,7 +4155,7 @@ in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -odp_flow="in_port(1)" +odp_flow="in_port(p1)" br_flow="in_port=1" # Test command: ofproto/trace odp_flow with in_port as a name. AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) From b6ec827fe0edb5478ac0f580eaa0597b1166a0fb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 20 May 2015 13:30:55 -0700 Subject: [PATCH 054/146] tests: Check that ofproto/trace accepts dpctl output. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- tests/ofproto-dpif.at | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index f3a660b95..b5a9ad917 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -4402,6 +4402,54 @@ ovs-appctl: ovs-vswitchd: server returned an error OVS_VSWITCHD_STOP AT_CLEANUP +# The third test checks that the output of "ovs-dpctl -m" is valid to trace. +AT_SETUP([ofproto-dpif - ofproto/trace from dpctl output]) +OVS_VSWITCHD_START([dnl + set Open_vSwitch . other_config:max-idle=10000 \ + -- add-port br0 p1 -- set Interface p1 type=dummy]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +ovs-appctl revalidator/wait +AT_CHECK([ovs-appctl dpif/dump-flows -m br0 | sed 's/, packets.*$//' > dp_flows1.txt]) + +odp_flow=`cat dp_flows1.txt` +AT_CHECK([ovs-appctl ofproto/trace "$odp_flow" | sed 's/\([[Ff]]low:\).*/\1 /'], [0], [dnl +Bridge: br0 +Flow: +No match, packets dropped because OFPPC_NO_PACKET_IN is set on in_port. + +Rule: table=254 cookie=0 priority=0,reg0=0x2 +OpenFlow actions=drop + +Final flow: +Megaflow: +Datapath actions: drop +]) + +dnl Now, try again without megaflows: +ovs-appctl upcall/disable-megaflows + +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +ovs-appctl revalidator/wait +AT_CHECK([ovs-appctl dpif/dump-flows -m br0 | sed 's/, packets.*$//' > dp_flows2.txt]) + +odp_flow=`cat dp_flows2.txt` +AT_CHECK([ovs-appctl ofproto/trace "$odp_flow" | sed 's/\([[Ff]]low:\).*/\1 /'], [0], [dnl +Bridge: br0 +Flow: +No match, packets dropped because OFPPC_NO_PACKET_IN is set on in_port. + +Rule: table=254 cookie=0 priority=0,reg0=0x2 +OpenFlow actions=drop + +Final flow: +Megaflow: +Datapath actions: drop +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - ofproto/trace-packet-out]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) From 5e82ceefd12dfcb954da48d027b47d98dc53676a Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Wed, 27 May 2015 16:58:25 +0000 Subject: [PATCH 055/146] datapath-windows: Support for custom VXLAN tunnel port The kernel datapath supports only port 4789 for VXLAN tunnel creation. Added support in order to allow for the VXLAN tunnel port to be configurable to any port number set by the userspace. The patch also checks to see if an existing WFP filter, for the necessary UDP tunnel port, is already created before adding a new one. This is a double check, because currently the userspace also verifies this, but it is necessary to avoid future issues. Custom VXLAN tunnel port requires the addition of a new WFP filter with the new UDP tunnel port. The creation of a new WFP filter is triggered in OvsInitVxlanTunnel function and the removal of the WFP filter in OvsCleanupVxlanTunnel function. But the latter functions are running at IRQL = DISPATCH_LEVEL, due to the NDIS RW lock acquisition, and all WFP calls must be running at IRQL = PASSIVE_LEVEL. This is why I have created a system thread which records all filter addition/removal requests into a list for later processing by the system thread. The ThreadStart routine processes all received requests at IRQL = PASSIVE_LEVEL, which is the required IRQL for the necessary WFP calls for adding/removal of the WFP filters. The WFP filter for the default VXLAN port 4789 is not added anymore at filter attach. All WFP filters for the tunnel ports are added when the tunnel ports are initialized and are removed at cleanup. WFP operation status is then reported to userspace. It is necessary that OvsTunnelFilterUninitialize function is called after OvsClearAllSwitchVports in order to allow for the added WFP filters to be removed. OvsTunnelFilterUninitialize function closes the global engine handle used by most of the WFP calls, including filter removal. Signed-off-by: Sorin Vinturis Reported-by: Alin Gabriel Serdean Reported-at: https://github.com/openvswitch/ovs-issues/issues/66 Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/Datapath.c | 8 +- datapath-windows/ovsext/Netlink/Netlink.c | 2 + .../ovsext/Netlink/NetlinkError.h | 19 +- datapath-windows/ovsext/Switch.c | 2 +- datapath-windows/ovsext/Tunnel.h | 6 - datapath-windows/ovsext/TunnelFilter.c | 881 ++++++++++++++---- datapath-windows/ovsext/TunnelIntf.h | 15 + datapath-windows/ovsext/Vport.c | 455 +++++++-- datapath-windows/ovsext/Vport.h | 12 +- datapath-windows/ovsext/Vxlan.c | 102 +- datapath-windows/ovsext/Vxlan.h | 13 +- 11 files changed, 1231 insertions(+), 284 deletions(-) diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c index 7646f0a91..185bfb53c 100644 --- a/datapath-windows/ovsext/Datapath.c +++ b/datapath-windows/ovsext/Datapath.c @@ -670,7 +670,6 @@ OvsCleanupDevice(PDEVICE_OBJECT deviceObject, return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); } - /* * -------------------------------------------------------------------------- * IOCTL function handler for the device. @@ -925,9 +924,12 @@ exit: KeMemoryBarrier(); instance->inUse = 0; - /* Should not complete a pending IRP unless proceesing is completed */ + /* Should not complete a pending IRP unless proceesing is completed. */ if (status == STATUS_PENDING) { - return status; + /* STATUS_PENDING is returned by the NL handler when the request is + * to be processed later, so we mark the IRP as pending and complete + * it in another thread when the request is processed. */ + IoMarkIrpPending(irp); } return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status); } diff --git a/datapath-windows/ovsext/Netlink/Netlink.c b/datapath-windows/ovsext/Netlink/Netlink.c index 589e3a1f3..a62d76091 100644 --- a/datapath-windows/ovsext/Netlink/Netlink.c +++ b/datapath-windows/ovsext/Netlink/Netlink.c @@ -112,6 +112,8 @@ NlBuildErrorMsg(POVS_MESSAGE msgIn, POVS_MESSAGE_ERROR msgError, UINT errorCode) { NL_BUFFER nlBuffer; + ASSERT(errorCode != NL_ERROR_PENDING); + NlBufInit(&nlBuffer, (PCHAR)msgError, sizeof *msgError); NlFillNlHdr(&nlBuffer, NLMSG_ERROR, 0, msgIn->nlMsg.nlmsgSeq, msgIn->nlMsg.nlmsgPid); diff --git a/datapath-windows/ovsext/Netlink/NetlinkError.h b/datapath-windows/ovsext/Netlink/NetlinkError.h index 53c935ff2..eefa89e34 100644 --- a/datapath-windows/ovsext/Netlink/NetlinkError.h +++ b/datapath-windows/ovsext/Netlink/NetlinkError.h @@ -195,14 +195,16 @@ typedef enum _NL_ERROR_ NL_ERROR_TIMEDOUT = ((ULONG)-138), /* The given text file is busy */ NL_ERROR_TXTBSY = ((ULONG)-139), - /*the operation would block */ + /* The operation would block */ NL_ERROR_WOULDBLOCK = ((ULONG)-140), + /* The operation is not finished */ + NL_ERROR_PENDING = ((ULONG)-141), } NL_ERROR; static __inline NlMapStatusToNlErr(NTSTATUS status) { - NL_ERROR ret = NL_ERROR_INVAL; + NL_ERROR ret; switch (status) { @@ -215,7 +217,20 @@ NlMapStatusToNlErr(NTSTATUS status) case STATUS_SUCCESS: ret = NL_ERROR_SUCCESS; break; + case STATUS_PENDING: + ret = NL_ERROR_PENDING; + break; + case STATUS_CANCELLED: + ret = NL_ERROR_CANCELED; + break; + case STATUS_INVALID_PARAMETER: + ret = NL_ERROR_INVAL; + break; + case STATUS_OBJECT_NAME_EXISTS: + ret = NL_ERROR_EXIST; + break; default: + ret = NL_ERROR_OTHER; break; } diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/Switch.c index 032153d39..416bcc03f 100644 --- a/datapath-windows/ovsext/Switch.c +++ b/datapath-windows/ovsext/Switch.c @@ -263,8 +263,8 @@ OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext) if (switchContext) { dpNo = switchContext->dpNo; - OvsUninitTunnelFilter(gOvsExtDriverObject); OvsClearAllSwitchVports(switchContext); + OvsUninitTunnelFilter(gOvsExtDriverObject); OvsUninitSwitchContext(switchContext); } OVS_LOG_TRACE("Exit: deleted switch %p dpNo: %d", switchContext, dpNo); diff --git a/datapath-windows/ovsext/Tunnel.h b/datapath-windows/ovsext/Tunnel.h index 2978bb395..2c45e35fd 100644 --- a/datapath-windows/ovsext/Tunnel.h +++ b/datapath-windows/ovsext/Tunnel.h @@ -32,12 +32,6 @@ typedef struct OVS_TUNNEL_PENDED_PACKET_ FWPS_CLASSIFY_OUT *classifyOut; } OVS_TUNNEL_PENDED_PACKET; -/* Shared global data. */ - -extern UINT16 configNewDestPort; - -extern UINT32 gCalloutIdV4; - // // Shared function prototypes // diff --git a/datapath-windows/ovsext/TunnelFilter.c b/datapath-windows/ovsext/TunnelFilter.c index c2186eb42..a47e0aa5b 100644 --- a/datapath-windows/ovsext/TunnelFilter.c +++ b/datapath-windows/ovsext/TunnelFilter.c @@ -48,27 +48,24 @@ /* Infinite timeout */ #define INFINITE 0xFFFFFFFF -/* - * The provider name should always match the provider string from the install - * file. - */ +/* The provider name should always match the provider string from the install + * file. */ #define OVS_TUNNEL_PROVIDER_NAME L"Open vSwitch" -/* - * The provider description should always contain the OVS service description - * string from the install file. - */ +/* The provider description should always contain the OVS service description + * string from the install file. */ #define OVS_TUNNEL_PROVIDER_DESC L"Open vSwitch Extension tunnel provider" /* The session name isn't required but it's useful for diagnostics. */ #define OVS_TUNNEL_SESSION_NAME L"OVS tunnel session" -/* Configurable parameters (addresses and ports are in host order) */ -UINT16 configNewDestPort = VXLAN_UDP_PORT; +/* Maximum number of tunnel threads to be created. */ +#define OVS_TUNFLT_MAX_THREADS 8 /* * Callout and sublayer GUIDs */ + // b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8 DEFINE_GUID( OVS_TUNNEL_CALLOUT_V4, @@ -105,42 +102,109 @@ DEFINE_GUID( 0xa5, 0x36, 0x1e, 0xed, 0xb9, 0xe9, 0xba, 0x6a ); +/* + * Callout driver type definitions + */ +typedef enum _OVS_TUNFLT_OPERATION { + OVS_TUN_FILTER_CREATE = 0, + OVS_TUN_FILTER_DELETE +} OVS_TUNFLT_OPERATION; + +typedef struct _OVS_TUNFLT_REQUEST { + LIST_ENTRY entry; + /* Tunnel filter destination port. */ + UINT16 port; + /* XXX: We also need to specify the tunnel L4 protocol, because there are + * different protocols that can use the same destination port.*/ + union { + /* Tunnel filter identification used for filter deletion. */ + UINT64 delID; + /* Pointer used to return filter ID to the caller on filter creation. */ + PUINT64 addID; + }filterID; + /* Requested operation to be performed. */ + OVS_TUNFLT_OPERATION operation; + /* Current I/O request to be completed when requested + * operation is finished. */ + PIRP irp; + /* Callback function called before completing the IRP. */ + PFNTunnelVportPendingOp callback; + /* Context passed to the callback function. */ + PVOID context; +} OVS_TUNFLT_REQUEST, *POVS_TUNFLT_REQUEST; + +typedef struct _OVS_TUNFLT_REQUEST_LIST { + /* SpinLock for syncronizing access to the requests list. */ + NDIS_SPIN_LOCK spinlock; + /* Head of the requests list. */ + LIST_ENTRY head; + /* Number of requests in the list. This variable is used by + * InterlockedCompareExchange function and needs to be aligned + * at 32-bit boundaries. */ + UINT32 numEntries; +} OVS_TUNFLT_REQUEST_LIST, *POVS_TUNFLT_REQUEST_LIST; + +typedef struct _OVS_TUNFLT_THREAD_CONTEXT { + /* Thread identification. */ + UINT threadID; + /* Thread's engine session handle. */ + HANDLE engineSession; + /* Reference of the thread object. */ + PVOID threadObject; + /* Requests queue list. */ + OVS_TUNFLT_REQUEST_LIST listRequests; + /* Event signaling that there are requests to process. */ + KEVENT requestEvent; + /* Event for stopping thread execution. */ + KEVENT stopEvent; +} OVS_TUNFLT_THREAD_CONTEXT, *POVS_TUNFLT_THREAD_CONTEXT; + +KSTART_ROUTINE OvsTunnelFilterThreadProc; + +static NTSTATUS OvsTunnelFilterStartThreads(); +static NTSTATUS OvsTunnelFilterThreadStart(POVS_TUNFLT_THREAD_CONTEXT threadCtx); +static VOID OvsTunnelFilterStopThreads(); +static VOID OvsTunnelFilterThreadStop(POVS_TUNFLT_THREAD_CONTEXT threadCtx, + BOOLEAN signalEvent); +static NTSTATUS OvsTunnelFilterThreadInit(POVS_TUNFLT_THREAD_CONTEXT threadCtx); +static VOID OvsTunnelFilterThreadUninit(POVS_TUNFLT_THREAD_CONTEXT threadCtx); + /* * Callout driver global variables */ -PDEVICE_OBJECT gDeviceObject; -HANDLE gEngineHandle = NULL; -HANDLE gTunnelProviderBfeHandle = NULL; -HANDLE gTunnelInitBfeHandle = NULL; -UINT32 gCalloutIdV4; +static PDEVICE_OBJECT gDeviceObject = NULL; +static HANDLE gEngineHandle = NULL; +static HANDLE gTunnelProviderBfeHandle = NULL; +static HANDLE gTunnelInitBfeHandle = NULL; +static HANDLE gBfeSubscriptionHandle = NULL; +static UINT32 gCalloutIdV4 = 0; +static OVS_TUNFLT_THREAD_CONTEXT gTunnelThreadCtx[OVS_TUNFLT_MAX_THREADS] = { 0 }; - -/* Callout driver implementation */ +/* + * Callout driver implementation. + */ NTSTATUS -OvsTunnelEngineOpen(HANDLE *handle) +OvsTunnelEngineOpen(HANDLE *engineSession) { NTSTATUS status = STATUS_SUCCESS; FWPM_SESSION session = { 0 }; - /* The session name isn't required but may be useful for diagnostics. */ - session.displayData.name = OVS_TUNNEL_SESSION_NAME; /* * Set an infinite wait timeout, so we don't have to handle FWP_E_TIMEOUT * errors while waiting to acquire the transaction lock. */ session.txnWaitTimeoutInMSec = INFINITE; - session.flags = FWPM_SESSION_FLAG_DYNAMIC; /* The authentication service should always be RPC_C_AUTHN_DEFAULT. */ status = FwpmEngineOpen(NULL, RPC_C_AUTHN_DEFAULT, NULL, &session, - handle); + engineSession); if (!NT_SUCCESS(status)) { - OVS_LOG_ERROR("Fail to open filtering engine session, status: %x.", + OVS_LOG_ERROR("Failed to open filtering engine session, status: %x.", status); } @@ -148,23 +212,23 @@ OvsTunnelEngineOpen(HANDLE *handle) } VOID -OvsTunnelEngineClose(HANDLE *handle) +OvsTunnelEngineClose(HANDLE *engineSession) { - if (*handle) { - FwpmEngineClose(*handle); - *handle = NULL; + if (*engineSession) { + FwpmEngineClose(*engineSession); + *engineSession = NULL; } } VOID -OvsTunnelAddSystemProvider(HANDLE handle) +OvsTunnelAddSystemProvider(HANDLE engineSession) { NTSTATUS status = STATUS_SUCCESS; BOOLEAN inTransaction = FALSE; FWPM_PROVIDER provider = { 0 }; do { - status = FwpmTransactionBegin(handle, 0); + status = FwpmTransactionBegin(engineSession, 0); if (!NT_SUCCESS(status)) { break; } @@ -180,7 +244,7 @@ OvsTunnelAddSystemProvider(HANDLE handle) */ provider.flags = FWPM_PROVIDER_FLAG_PERSISTENT; - status = FwpmProviderAdd(handle, + status = FwpmProviderAdd(engineSession, &provider, NULL); if (!NT_SUCCESS(status)) { @@ -191,7 +255,7 @@ OvsTunnelAddSystemProvider(HANDLE handle) } } - status = FwpmTransactionCommit(handle); + status = FwpmTransactionCommit(engineSession); if (!NT_SUCCESS(status)) { break; } @@ -200,30 +264,30 @@ OvsTunnelAddSystemProvider(HANDLE handle) } while (inTransaction); if (inTransaction){ - FwpmTransactionAbort(handle); + FwpmTransactionAbort(engineSession); } } VOID -OvsTunnelRemoveSystemProvider(HANDLE handle) +OvsTunnelRemoveSystemProvider(HANDLE engineSession) { NTSTATUS status = STATUS_SUCCESS; BOOLEAN inTransaction = FALSE; do { - status = FwpmTransactionBegin(handle, 0); + status = FwpmTransactionBegin(engineSession, 0); if (!NT_SUCCESS(status)) { break; } inTransaction = TRUE; - status = FwpmProviderDeleteByKey(handle, + status = FwpmProviderDeleteByKey(engineSession, &OVS_TUNNEL_PROVIDER_KEY); if (!NT_SUCCESS(status)) { break; } - status = FwpmTransactionCommit(handle); + status = FwpmTransactionCommit(engineSession); if (!NT_SUCCESS(status)) { break; } @@ -232,29 +296,30 @@ OvsTunnelRemoveSystemProvider(HANDLE handle) } while (inTransaction); if (inTransaction){ - FwpmTransactionAbort(handle); + FwpmTransactionAbort(engineSession); } } NTSTATUS -OvsTunnelAddFilter(PWSTR filterName, +OvsTunnelAddFilter(HANDLE engineSession, + PWSTR filterName, const PWSTR filterDesc, USHORT remotePort, FWP_DIRECTION direction, UINT64 context, const GUID *filterKey, const GUID *layerKey, - const GUID *calloutKey) + const GUID *calloutKey, + UINT64 *filterID) { NTSTATUS status = STATUS_SUCCESS; FWPM_FILTER filter = {0}; FWPM_FILTER_CONDITION filterConditions[3] = {0}; UINT conditionIndex; - UNREFERENCED_PARAMETER(remotePort); - UNREFERENCED_PARAMETER(direction); - - filter.filterKey = *filterKey; + if (filterKey) { + filter.filterKey = *filterKey; + } filter.layerKey = *layerKey; filter.displayData.name = (wchar_t*)filterName; filter.displayData.description = (wchar_t*)filterDesc; @@ -284,64 +349,18 @@ OvsTunnelAddFilter(PWSTR filterName, filter.numFilterConditions = conditionIndex; - status = FwpmFilterAdd(gEngineHandle, + status = FwpmFilterAdd(engineSession, &filter, NULL, - NULL); + filterID); return status; } -NTSTATUS -OvsTunnelRemoveFilter(const GUID *filterKey, - const GUID *sublayerKey) -{ - NTSTATUS status = STATUS_SUCCESS; - BOOLEAN inTransaction = FALSE; - - do { - status = FwpmTransactionBegin(gEngineHandle, 0); - if (!NT_SUCCESS(status)) { - break; - } - - inTransaction = TRUE; - - /* - * We have to delete the filter first since it references the - * sublayer. If we tried to delete the sublayer first, it would fail - * with FWP_ERR_IN_USE. - */ - status = FwpmFilterDeleteByKey(gEngineHandle, - filterKey); - if (!NT_SUCCESS(status)) { - break; - } - - status = FwpmSubLayerDeleteByKey(gEngineHandle, - sublayerKey); - if (!NT_SUCCESS(status)) { - break; - } - - status = FwpmTransactionCommit(gEngineHandle); - if (!NT_SUCCESS(status)){ - break; - } - - inTransaction = FALSE; - } while (inTransaction); - - if (inTransaction) { - FwpmTransactionAbort(gEngineHandle); - } - return status; -} - /* * -------------------------------------------------------------------------- - * This function registers callouts and filters that intercept UDP traffic at - * WFP FWPM_LAYER_DATAGRAM_DATA_V4 + * This function registers callouts for intercepting UDP traffic at WFP + * FWPM_LAYER_DATAGRAM_DATA_V4 layer. * -------------------------------------------------------------------------- */ NTSTATUS @@ -368,10 +387,7 @@ OvsTunnelRegisterDatagramDataCallouts(const GUID *layerKey, sCallout.flags = FWP_CALLOUT_FLAG_CONDITIONAL_ON_FLOW; #endif - status = FwpsCalloutRegister(deviceObject, - &sCallout, - calloutId); - + status = FwpsCalloutRegister(deviceObject, &sCallout, calloutId); if (!NT_SUCCESS(status)) { goto Exit; } @@ -384,24 +400,11 @@ OvsTunnelRegisterDatagramDataCallouts(const GUID *layerKey, mCallout.displayData = displayData; mCallout.applicableLayer = *layerKey; - status = FwpmCalloutAdd(gEngineHandle, - &mCallout, - NULL, - NULL); - + status = FwpmCalloutAdd(gEngineHandle, &mCallout, NULL, NULL); if (!NT_SUCCESS(status)) { goto Exit; } - status = OvsTunnelAddFilter(L"Datagram-Data OVS Filter (Inbound)", - L"address/port for UDP", - configNewDestPort, - FWP_DIRECTION_INBOUND, - 0, - &OVS_TUNNEL_FILTER_KEY, - layerKey, - calloutKey); - Exit: if (!NT_SUCCESS(status)){ @@ -416,24 +419,16 @@ Exit: /* * -------------------------------------------------------------------------- - * This function registers dynamic callouts and filters that intercept UDP - * Callouts and filters will be removed during De-Initialize. + * This function registers non-dynamic callouts for intercepting UDP traffic. + * Callouts will be removed during un-initializing phase. * -------------------------------------------------------------------------- */ NTSTATUS OvsTunnelRegisterCallouts(VOID *deviceObject) { - NTSTATUS status = STATUS_SUCCESS; - FWPM_SUBLAYER OvsTunnelSubLayer; - - BOOLEAN engineOpened = FALSE; - BOOLEAN inTransaction = FALSE; - - status = OvsTunnelEngineOpen(&gEngineHandle); - if (!NT_SUCCESS(status)) { - goto Exit; - } - engineOpened = TRUE; + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN inTransaction = FALSE; + FWPM_SUBLAYER OvsTunnelSubLayer; status = FwpmTransactionBegin(gEngineHandle, 0); if (!NT_SUCCESS(status)) { @@ -476,22 +471,17 @@ Exit: if (inTransaction) { FwpmTransactionAbort(gEngineHandle); } - if (engineOpened) { - OvsTunnelEngineClose(&gEngineHandle); - } } return status; } VOID -OvsTunnelUnregisterCallouts(VOID) +OvsTunnelUnregisterCallouts() { - OvsTunnelRemoveFilter(&OVS_TUNNEL_FILTER_KEY, - &OVS_TUNNEL_SUBLAYER); FwpsCalloutUnregisterById(gCalloutIdV4); + FwpmSubLayerDeleteByKey(gEngineHandle, &OVS_TUNNEL_SUBLAYER); FwpmCalloutDeleteById(gEngineHandle, gCalloutIdV4); - OvsTunnelEngineClose(&gEngineHandle); } VOID @@ -499,16 +489,22 @@ OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject) { UNREFERENCED_PARAMETER(driverObject); + OvsTunnelFilterStopThreads(); + OvsTunnelUnregisterCallouts(); - IoDeleteDevice(gDeviceObject); + OvsTunnelEngineClose(&gEngineHandle); + + if (gDeviceObject) { + IoDeleteDevice(gDeviceObject); + } } NTSTATUS OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject) { - NTSTATUS status = STATUS_SUCCESS; - UNICODE_STRING deviceName; + NTSTATUS status = STATUS_SUCCESS; + UNICODE_STRING deviceName; RtlInitUnicodeString(&deviceName, L"\\Device\\OvsTunnelFilter"); @@ -521,22 +517,32 @@ OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject) FALSE, &gDeviceObject); + if (!NT_SUCCESS(status)){ + OVS_LOG_ERROR("Failed to create tunnel filter device, status: %x.", + status); + goto Exit; + } + + status = OvsTunnelFilterStartThreads(); + if (!NT_SUCCESS(status)){ + goto Exit; + } + + status = OvsTunnelEngineOpen(&gEngineHandle); if (!NT_SUCCESS(status)){ goto Exit; } status = OvsTunnelRegisterCallouts(gDeviceObject); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to register callout, status: %x.", + status); + } Exit: if (!NT_SUCCESS(status)){ - if (gEngineHandle != NULL) { - OvsTunnelUnregisterCallouts(); - } - - if (gDeviceObject) { - IoDeleteDevice(gDeviceObject); - } + OvsTunnelFilterUninitialize(driverObject); } return status; @@ -546,16 +552,16 @@ VOID NTAPI OvsTunnelProviderBfeCallback(PVOID context, FWPM_SERVICE_STATE bfeState) { - HANDLE handle = NULL; + HANDLE engineSession = NULL; DBG_UNREFERENCED_PARAMETER(context); if (FWPM_SERVICE_RUNNING == bfeState) { - OvsTunnelEngineOpen(&handle); - if (handle) { - OvsTunnelAddSystemProvider(handle); + OvsTunnelEngineOpen(&engineSession); + if (engineSession) { + OvsTunnelAddSystemProvider(engineSession); } - OvsTunnelEngineClose(&handle); + OvsTunnelEngineClose(&engineSession); } } @@ -599,16 +605,16 @@ VOID OvsRegisterSystemProvider(PVOID deviceObject) { NTSTATUS status = STATUS_SUCCESS; - HANDLE handle = NULL; + HANDLE engineSession = NULL; status = OvsSubscribeTunnelProviderBfeStateChanges(deviceObject); if (NT_SUCCESS(status)) { if (FWPM_SERVICE_RUNNING == FwpmBfeStateGet()) { - OvsTunnelEngineOpen(&handle); - if (handle) { - OvsTunnelAddSystemProvider(handle); + OvsTunnelEngineOpen(&engineSession); + if (engineSession) { + OvsTunnelAddSystemProvider(engineSession); } - OvsTunnelEngineClose(&handle); + OvsTunnelEngineClose(&engineSession); OvsUnsubscribeTunnelProviderBfeStateChanges(); } @@ -617,13 +623,13 @@ OvsRegisterSystemProvider(PVOID deviceObject) VOID OvsUnregisterSystemProvider() { - HANDLE handle = NULL; + HANDLE engineSession = NULL; - OvsTunnelEngineOpen(&handle); - if (handle) { - OvsTunnelRemoveSystemProvider(handle); + OvsTunnelEngineOpen(&engineSession); + if (engineSession) { + OvsTunnelRemoveSystemProvider(engineSession); } - OvsTunnelEngineClose(&handle); + OvsTunnelEngineClose(&engineSession); OvsUnsubscribeTunnelProviderBfeStateChanges(); } @@ -711,3 +717,566 @@ VOID OvsUninitTunnelFilter(PDRIVER_OBJECT driverObject) OvsTunnelFilterUninitialize(driverObject); OvsUnsubscribeTunnelInitBfeStateChanges(); } + +NTSTATUS +OvsTunnelAddFilterEx(HANDLE engineSession, + UINT32 filterPort, + UINT64 *filterID) +{ + NTSTATUS status = STATUS_SUCCESS; + + status = OvsTunnelAddFilter(engineSession, + L"Datagram-Data OVS Filter (Inbound)", + L"address/port for UDP", + (USHORT)filterPort, + FWP_DIRECTION_INBOUND, + 0, + NULL, + &FWPM_LAYER_DATAGRAM_DATA_V4, + &OVS_TUNNEL_CALLOUT_V4, + filterID); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to add tunnel filter for port: %d, status: %x.", + filterPort, status); + } else { + OVS_LOG_INFO("Filter added, filter port: %d, filter ID: %d.", + filterPort, *filterID); + } + + return status; +} + +NTSTATUS +OvsTunnelRemoveFilterEx(HANDLE engineSession, + UINT64 filterID) +{ + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN error = TRUE; + + do { + if (filterID == 0) { + OVS_LOG_INFO("No tunnel filter to remove."); + break; + } + + status = FwpmFilterDeleteById(engineSession, filterID); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to remove tunnel with filter ID: %d,\ + status: %x.", filterID, status); + break; + } + OVS_LOG_INFO("Filter removed, filter ID: %d.", + filterID); + + error = FALSE; + } while (error); + + return status; +} + +NTSTATUS +OvsTunnelFilterExecuteAction(HANDLE engineSession, + POVS_TUNFLT_REQUEST request) +{ + NTSTATUS status = STATUS_SUCCESS; + + switch (request->operation) + { + case OVS_TUN_FILTER_CREATE: + status = OvsTunnelAddFilterEx(engineSession, + request->port, + request->filterID.addID); + break; + case OVS_TUN_FILTER_DELETE: + status = OvsTunnelRemoveFilterEx(engineSession, + request->filterID.delID); + break; + default: + status = STATUS_NOT_SUPPORTED; + break; + } + + return status; +} + +VOID +OvsTunnelFilterRequestPopList(POVS_TUNFLT_REQUEST_LIST listRequests, + PLIST_ENTRY head, + UINT32 *count) +{ + NdisAcquireSpinLock(&listRequests->spinlock); + + if (!IsListEmpty(&listRequests->head)) { + PLIST_ENTRY PrevEntry; + PLIST_ENTRY NextEntry; + + NextEntry = listRequests->head.Flink; + PrevEntry = listRequests->head.Blink; + + head->Flink = NextEntry; + NextEntry->Blink = head; + + head->Blink = PrevEntry; + PrevEntry->Flink = head; + + *count = listRequests->numEntries; + + InitializeListHead(&listRequests->head); + listRequests->numEntries = 0; + } + + NdisReleaseSpinLock(&listRequests->spinlock); +} + +VOID +OvsTunnelFilterRequestPush(POVS_TUNFLT_REQUEST_LIST listRequests, + POVS_TUNFLT_REQUEST request) +{ + NdisAcquireSpinLock(&listRequests->spinlock); + + InsertTailList(&listRequests->head, &(request->entry)); + listRequests->numEntries++; + + NdisReleaseSpinLock(&listRequests->spinlock); +} + +VOID +OvsTunnelFilterThreadPush(POVS_TUNFLT_REQUEST request) +{ + UINT32 threadIndex; + + threadIndex = request->port % OVS_TUNFLT_MAX_THREADS; + + OvsTunnelFilterRequestPush( + &gTunnelThreadCtx[threadIndex].listRequests, + request); + + KeSetEvent(&gTunnelThreadCtx[threadIndex].requestEvent, + IO_NO_INCREMENT, + FALSE); +} + +VOID +OvsTunnelFilterCompleteRequest(PIRP irp, + PFNTunnelVportPendingOp callback, + PVOID context, + NTSTATUS status) +{ + UINT32 replyLen = 0; + + if (callback) { + callback(context, status, &replyLen); + /* Release the context passed to the callback function. */ + OvsFreeMemory(context); + } + + if (irp) { + OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status); + } +} + +VOID +OvsTunnelFilterRequestListProcess(POVS_TUNFLT_THREAD_CONTEXT threadCtx) +{ + POVS_TUNFLT_REQUEST request = NULL; + PLIST_ENTRY link = NULL; + PLIST_ENTRY next = NULL; + LIST_ENTRY head; + NTSTATUS status = STATUS_SUCCESS; + UINT32 count = 0; + BOOLEAN inTransaction = FALSE; + BOOLEAN error = TRUE; + + do + { + if (!InterlockedCompareExchange( + (LONG volatile *)&threadCtx->listRequests.numEntries, 0, 0)) { + OVS_LOG_INFO("Nothing to do... request list is empty."); + break; + } + + status = FwpmTransactionBegin(threadCtx->engineSession, 0); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to start transaction, status: %x.", + status); + break; + } + inTransaction = TRUE; + + InitializeListHead(&head); + OvsTunnelFilterRequestPopList(&threadCtx->listRequests, &head, &count); + + LIST_FORALL_SAFE(&head, link, next) { + request = CONTAINING_RECORD(link, OVS_TUNFLT_REQUEST, entry); + + status = OvsTunnelFilterExecuteAction(threadCtx->engineSession, + request); + if (!NT_SUCCESS(status)) { + RemoveEntryList(&request->entry); + count--; + + /* Complete the IRP with the failure status. */ + OvsTunnelFilterCompleteRequest(request->irp, + request->callback, + request->context, + status); + OvsFreeMemory(request); + request = NULL; + } else { + error = FALSE; + } + } + + if (error) { + /* No successful requests were made, so there is no point to commit + * the transaction. */ + break; + } + + status = FwpmTransactionCommit(threadCtx->engineSession); + if (!NT_SUCCESS(status)){ + OVS_LOG_ERROR("Failed to commit transaction, status: %x.", + status); + break; + } + + inTransaction = FALSE; + } while (inTransaction); + + if (inTransaction) { + FwpmTransactionAbort(threadCtx->engineSession); + OVS_LOG_ERROR("Failed to execute request, status: %x.\ + Transaction aborted.", status); + } + + /* Complete the requests successfully executed with the transaction commit + * status. */ + while (count) { + request = (POVS_TUNFLT_REQUEST)RemoveHeadList(&head); + count--; + + OvsTunnelFilterCompleteRequest(request->irp, + request->callback, + request->context, + status); + OvsFreeMemory(request); + request = NULL; + } +} + +/* + *---------------------------------------------------------------------------- + * System thread routine that handles tunnel filter create/delete requests. + *---------------------------------------------------------------------------- + */ +_Use_decl_annotations_ +VOID +OvsTunnelFilterThreadProc(PVOID context) +{ + NTSTATUS status = STATUS_SUCCESS; + POVS_TUNFLT_THREAD_CONTEXT threadCtx = (POVS_TUNFLT_THREAD_CONTEXT)context; + PKEVENT eventArray[2] = { 0 }; + ULONG count = 0; + BOOLEAN exit = FALSE; + BOOLEAN error = TRUE; + + OVS_LOG_INFO("Starting OVS Tunnel system thread %d.", + threadCtx->threadID); + + eventArray[0] = &threadCtx->stopEvent; + eventArray[1] = &threadCtx->requestEvent; + count = ARRAY_SIZE(eventArray); + + do { + status = OvsTunnelFilterThreadInit(threadCtx); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to initialize tunnel filter thread %d.", + threadCtx->threadID); + break; + } + + do { + status = KeWaitForMultipleObjects(count, + (PVOID)eventArray, + WaitAny, + Executive, + KernelMode, + FALSE, + NULL, + NULL); + switch (status) { + case STATUS_WAIT_1: + /* Start processing requests. */ + OvsTunnelFilterRequestListProcess(threadCtx); + break; + default: + /* Finish processing the received requests and exit. */ + OvsTunnelFilterRequestListProcess(threadCtx); + exit = TRUE; + break; + } + } while (!exit); + + OvsTunnelFilterThreadUninit(threadCtx); + + error = FALSE; + } while (error); + + OVS_LOG_INFO("Terminating OVS Tunnel system thread %d.", + threadCtx->threadID); + + PsTerminateSystemThread(STATUS_SUCCESS); +}; + +static NTSTATUS +OvsTunnelFilterStartThreads() +{ + NTSTATUS status = STATUS_SUCCESS; + + for (UINT index = 0; index < OVS_TUNFLT_MAX_THREADS; index++) { + gTunnelThreadCtx[index].threadID = index; + + status = OvsTunnelFilterThreadStart(&gTunnelThreadCtx[index]); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to start tunnel filter thread %d.", index); + break; + } + } + + return status; +} + +static NTSTATUS +OvsTunnelFilterThreadStart(POVS_TUNFLT_THREAD_CONTEXT threadCtx) +{ + NTSTATUS status = STATUS_SUCCESS; + HANDLE threadHandle = NULL; + BOOLEAN error = TRUE; + + do { + status = PsCreateSystemThread(&threadHandle, + SYNCHRONIZE, + NULL, + NULL, + NULL, + OvsTunnelFilterThreadProc, + threadCtx); + if (!NT_SUCCESS(status)) { + OVS_LOG_ERROR("Failed to create tunnel thread, status: %x.", + status); + break; + } + + ObReferenceObjectByHandle(threadHandle, + SYNCHRONIZE, + NULL, + KernelMode, + &threadCtx->threadObject, + NULL); + ZwClose(threadHandle); + threadHandle = NULL; + + error = FALSE; + } while (error); + + return status; +} + +static VOID +OvsTunnelFilterStopThreads() +{ + /* Signal all threads to stop and ignore all subsequent requests. */ + for (UINT index = 0; index < OVS_TUNFLT_MAX_THREADS; index++) { + OvsTunnelFilterThreadStop(&gTunnelThreadCtx[index], TRUE); + } + + /* Wait for all threads to finish processing the requests. */ + for (UINT index = 0; index < OVS_TUNFLT_MAX_THREADS; index++) { + OvsTunnelFilterThreadStop(&gTunnelThreadCtx[index], FALSE); + } +} + +static VOID +OvsTunnelFilterThreadStop(POVS_TUNFLT_THREAD_CONTEXT threadCtx, + BOOLEAN signalEvent) +{ + if (signalEvent) { + /* Signal stop thread event. */ + OVS_LOG_INFO("Received stop event for OVS Tunnel system thread %d.", + threadCtx->threadID); + KeSetEvent(&threadCtx->stopEvent, IO_NO_INCREMENT, FALSE); + } else { + /* Wait for the tunnel thread to finish. */ + KeWaitForSingleObject(threadCtx->threadObject, + Executive, + KernelMode, + FALSE, + NULL); + + ObDereferenceObject(threadCtx->threadObject); + } +} + +static NTSTATUS +OvsTunnelFilterThreadInit(POVS_TUNFLT_THREAD_CONTEXT threadCtx) +{ + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN error = TRUE; + + do { + /* Create thread's engine session object. */ + status = OvsTunnelEngineOpen(&threadCtx->engineSession); + if (!NT_SUCCESS(status)) { + break; + } + + NdisAllocateSpinLock(&threadCtx->listRequests.spinlock); + + InitializeListHead(&threadCtx->listRequests.head); + + KeInitializeEvent(&threadCtx->stopEvent, + NotificationEvent, + FALSE); + + KeInitializeEvent(&threadCtx->requestEvent, + SynchronizationEvent, + FALSE); + + error = FALSE; + } while (error); + + return status; +} + +static VOID +OvsTunnelFilterThreadUninit(POVS_TUNFLT_THREAD_CONTEXT threadCtx) +{ + if (threadCtx->engineSession) { + /* Close thread's FWPM session. */ + OvsTunnelEngineClose(&threadCtx->engineSession); + + NdisFreeSpinLock(&threadCtx->listRequests.spinlock); + } +} + +NTSTATUS +OvsTunnelFilterQueueRequest(PIRP irp, + UINT16 remotePort, + UINT64 *filterID, + OVS_TUNFLT_OPERATION operation, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext) +{ + POVS_TUNFLT_REQUEST request = NULL; + NTSTATUS status = STATUS_PENDING; + BOOLEAN error = TRUE; + UINT64 timeout = 0; + + do { + /* Verify if the stop event was signaled. */ + if (STATUS_SUCCESS == KeWaitForSingleObject( + &gTunnelThreadCtx[0].stopEvent, + Executive, + KernelMode, + FALSE, + (LARGE_INTEGER *)&timeout)) { + /* The stop event is signaled. Completed the IRP with + * STATUS_CANCELLED. */ + status = STATUS_CANCELLED; + break; + } + + if (NULL == filterID) { + OVS_LOG_ERROR("Invalid request."); + status = STATUS_INVALID_PARAMETER; + break; + } + + request = (POVS_TUNFLT_REQUEST) OvsAllocateMemory(sizeof(*request)); + if (NULL == request) { + OVS_LOG_ERROR("Failed to allocate list item."); + status = STATUS_INSUFFICIENT_RESOURCES; + break; + } + + request->port = remotePort; + request->operation = operation; + switch (operation) { + case OVS_TUN_FILTER_CREATE: + request->filterID.addID = filterID; + break; + case OVS_TUN_FILTER_DELETE: + request->filterID.delID = *filterID; + break; + } + request->irp = irp; + request->callback = callback; + request->context = tunnelContext; + + OvsTunnelFilterThreadPush(request); + + error = FALSE; + } while (error); + + if (error) { + OvsTunnelFilterCompleteRequest(irp, callback, tunnelContext, status); + if (request) { + OvsFreeMemory(request); + request = NULL; + } + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * This function adds a new WFP filter for the received port and returns the + * ID of the created WFP filter. + * + * Note: + * All necessary calls to the WFP filtering engine must be running at IRQL = + * PASSIVE_LEVEL. Because the function is called at IRQL = DISPATCH_LEVEL, + * we register an OVS_TUN_FILTER_CREATE request that will be processed by + * the tunnel filter thread routine at IRQL = PASSIVE_LEVEL. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsTunelFilterCreate(PIRP irp, + UINT16 filterPort, + UINT64 *filterID, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext) +{ + return OvsTunnelFilterQueueRequest(irp, + filterPort, + filterID, + OVS_TUN_FILTER_CREATE, + callback, + tunnelContext); +} + +/* + * -------------------------------------------------------------------------- + * This function removes a WFP filter using the received filter ID. + * + * Note: + * All necessary calls to the WFP filtering engine must be running at IRQL = + * PASSIVE_LEVEL. Because the function is called at IRQL = DISPATCH_LEVEL, + * we register an OVS_TUN_FILTER_DELETE request that will be processed by + * the tunnel filter thread routine at IRQL = PASSIVE_LEVEL. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsTunelFilterDelete(PIRP irp, + UINT64 filterID, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext) +{ + return OvsTunnelFilterQueueRequest(irp, + 0, + &filterID, + OVS_TUN_FILTER_DELETE, + callback, + tunnelContext); +} diff --git a/datapath-windows/ovsext/TunnelIntf.h b/datapath-windows/ovsext/TunnelIntf.h index 82a51459a..6a2c980c5 100644 --- a/datapath-windows/ovsext/TunnelIntf.h +++ b/datapath-windows/ovsext/TunnelIntf.h @@ -17,6 +17,10 @@ #ifndef __TUNNEL_INTF_H_ #define __TUNNEL_INTF_H_ 1 +typedef VOID(*PFNTunnelVportPendingOp)(PVOID context, + NTSTATUS status, + UINT32 *replyLen); + /* Tunnel callout driver load/unload functions */ NTSTATUS OvsInitTunnelFilter(PDRIVER_OBJECT driverObject, PVOID deviceObject); @@ -26,4 +30,15 @@ VOID OvsRegisterSystemProvider(PVOID deviceObject); VOID OvsUnregisterSystemProvider(); +NTSTATUS OvsTunelFilterCreate(PIRP irp, + UINT16 filterPort, + UINT64 *filterID, + PFNTunnelVportPendingOp callback, + PVOID context); + +NTSTATUS OvsTunelFilterDelete(PIRP irp, + UINT64 filterID, + PFNTunnelVportPendingOp callback, + PVOID context); + #endif /* __TUNNEL_INTF_H_ */ diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index 1423ace65..66f918906 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -47,6 +47,20 @@ #define OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC 100 +/* Context structure used to pass back and forth information to the tunnel + * filter threads. */ +typedef struct _OVS_TUNFLT_INIT_CONTEXT { + POVS_SWITCH_CONTEXT switchContext; + UINT32 outputLength; + PVOID outputBuffer; + PVOID inputBuffer; + POVS_VPORT_ENTRY vport; + BOOLEAN hvSwitchPort; + BOOLEAN hvDelete; + BOOLEAN ovsDelete; +} OVS_TUNFLT_INIT_CONTEXT, *POVS_TUNFLT_INIT_CONTEXT; + + extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static VOID OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, @@ -69,6 +83,18 @@ static POVS_VPORT_ENTRY OvsFindVportByHvNameW(POVS_SWITCH_CONTEXT switchContext, static NDIS_STATUS InitHvVportCommon(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport, BOOLEAN newPort); +static VOID OvsCleanupVportCommon(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, + BOOLEAN hvSwitchPort, + BOOLEAN hvDelete, + BOOLEAN ovsDelete); +static VOID OvsTunnelVportPendingInit(PVOID context, + NTSTATUS status, + UINT32 *replyLen); +static VOID OvsTunnelVportPendingUninit(PVOID context, + NTSTATUS status, + UINT32 *replyLen); + /* * Functions implemented in relaton to NDIS port manipulation. @@ -246,7 +272,7 @@ HvDeletePort(POVS_SWITCH_CONTEXT switchContext, * delete will delete the vport. */ if (vport) { - OvsRemoveAndDeleteVport(switchContext, vport, TRUE, FALSE, NULL); + OvsRemoveAndDeleteVport(NULL, switchContext, vport, TRUE, FALSE); } else { OVS_LOG_WARN("Vport not present."); } @@ -534,13 +560,14 @@ HvDeleteNic(POVS_SWITCH_CONTEXT switchContext, goto done; } + vport->nicState = NdisSwitchNicStateUnknown; + vport->ovsState = OVS_STATE_PORT_CREATED; + portNo = vport->portNo; if (vport->portType == NdisSwitchPortTypeExternal && vport->nicIndex != 0) { - OvsRemoveAndDeleteVport(switchContext, vport, TRUE, FALSE, NULL); + OvsRemoveAndDeleteVport(NULL, switchContext, vport, TRUE, FALSE); } - vport->nicState = NdisSwitchNicStateUnknown; - vport->ovsState = OVS_STATE_PORT_CREATED; NdisReleaseRWLock(switchContext->dispatchLock, &lockState); /* XXX if portNo != INVALID or always? */ @@ -850,11 +877,14 @@ OvsInitPhysNicVport(POVS_VPORT_ENTRY physExtVport, * -------------------------------------------------------------------------- */ NTSTATUS -OvsInitTunnelVport(POVS_VPORT_ENTRY vport, +OvsInitTunnelVport(PVOID userContext, + POVS_VPORT_ENTRY vport, OVS_VPORT_TYPE ovsType, UINT16 dstPort) { NTSTATUS status = STATUS_SUCCESS; + POVS_USER_PARAMS_CONTEXT usrParamsCtx = + (POVS_USER_PARAMS_CONTEXT)userContext; vport->isBridgeInternal = FALSE; vport->ovsType = ovsType; @@ -865,8 +895,26 @@ OvsInitTunnelVport(POVS_VPORT_ENTRY vport, case OVS_VPORT_TYPE_GRE64: break; case OVS_VPORT_TYPE_VXLAN: - status = OvsInitVxlanTunnel(vport, dstPort); + { + POVS_TUNFLT_INIT_CONTEXT tunnelContext = NULL; + + tunnelContext = OvsAllocateMemory(sizeof(*tunnelContext)); + if (tunnelContext == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + break; + } + tunnelContext->inputBuffer = usrParamsCtx->inputBuffer; + tunnelContext->outputBuffer = usrParamsCtx->outputBuffer; + tunnelContext->outputLength = usrParamsCtx->outputLength; + tunnelContext->vport = vport; + + status = OvsInitVxlanTunnel(usrParamsCtx->irp, + vport, + dstPort, + OvsTunnelVportPendingInit, + (PVOID)tunnelContext); break; + } default: ASSERT(0); } @@ -1012,7 +1060,6 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, switch(vport->ovsType) { case OVS_VPORT_TYPE_VXLAN: - ASSERT(switchContext->vxlanVport == NULL); switchContext->vxlanVport = vport; switchContext->numNonHvVports++; break; @@ -1043,70 +1090,15 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, return STATUS_SUCCESS; } - -/* - * -------------------------------------------------------------------------- - * Provides functionality that is partly complementatry to - * InitOvsVportCommon()/InitHvVportCommon(). - * - * 'hvDelete' indicates if caller is removing the vport as a result of the - * port being removed on the Hyper-V switch. - * 'ovsDelete' indicates if caller is removing the vport as a result of the - * port being removed from OVS userspace. - * -------------------------------------------------------------------------- - */ -VOID -OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport, - BOOLEAN hvDelete, - BOOLEAN ovsDelete, - BOOLEAN *vportDeallocated) +static VOID +OvsCleanupVportCommon(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, + BOOLEAN hvSwitchPort, + BOOLEAN hvDelete, + BOOLEAN ovsDelete) { - BOOLEAN hvSwitchPort = FALSE; - BOOLEAN deletedOnOvs = FALSE, deletedOnHv = FALSE; - - if (vportDeallocated) { - *vportDeallocated = FALSE; - } - - if (vport->isExternal) { - if (vport->nicIndex == 0) { - ASSERT(switchContext->numPhysicalNics == 0); - switchContext->virtualExternalPortId = 0; - switchContext->virtualExternalVport = NULL; - OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); - if (vportDeallocated) { - *vportDeallocated = TRUE; - } - return; - } else { - ASSERT(switchContext->numPhysicalNics); - switchContext->numPhysicalNics--; - hvSwitchPort = TRUE; - } - } - - switch (vport->ovsType) { - case OVS_VPORT_TYPE_INTERNAL: - if (!vport->isBridgeInternal) { - switchContext->internalPortId = 0; - switchContext->internalVport = NULL; - OvsInternalAdapterDown(); - hvSwitchPort = TRUE; - } - break; - case OVS_VPORT_TYPE_VXLAN: - OvsCleanupVxlanTunnel(vport); - switchContext->vxlanVport = NULL; - break; - case OVS_VPORT_TYPE_GRE: - case OVS_VPORT_TYPE_GRE64: - break; - case OVS_VPORT_TYPE_NETDEV: - hvSwitchPort = TRUE; - default: - break; - } + BOOLEAN deletedOnOvs = FALSE; + BOOLEAN deletedOnHv = FALSE; /* * 'hvDelete' == TRUE indicates that the port should be removed from the @@ -1149,14 +1141,111 @@ OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, if (deletedOnHv && deletedOnOvs) { if (hvSwitchPort) { switchContext->numHvVports--; - } else { + } + else { switchContext->numNonHvVports--; } OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); - if (vportDeallocated) { - *vportDeallocated = TRUE; + } +} + +/* + * -------------------------------------------------------------------------- + * Provides functionality that is partly complementatry to + * InitOvsVportCommon()/InitHvVportCommon(). + * + * 'hvDelete' indicates if caller is removing the vport as a result of the + * port being removed on the Hyper-V switch. + * 'ovsDelete' indicates if caller is removing the vport as a result of the + * port being removed from OVS userspace. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsRemoveAndDeleteVport(PVOID usrParamsContext, + POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, + BOOLEAN hvDelete, + BOOLEAN ovsDelete) +{ + NTSTATUS status = STATUS_SUCCESS; + POVS_USER_PARAMS_CONTEXT usrParamsCtx = + (POVS_USER_PARAMS_CONTEXT)usrParamsContext; + BOOLEAN hvSwitchPort = FALSE; + + if (vport->isExternal) { + if (vport->nicIndex == 0) { + ASSERT(switchContext->numPhysicalNics == 0); + switchContext->virtualExternalPortId = 0; + switchContext->virtualExternalVport = NULL; + OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); + return STATUS_SUCCESS; + } else { + ASSERT(switchContext->numPhysicalNics); + switchContext->numPhysicalNics--; + hvSwitchPort = TRUE; } } + + switch (vport->ovsType) { + case OVS_VPORT_TYPE_INTERNAL: + if (!vport->isBridgeInternal) { + switchContext->internalPortId = 0; + switchContext->internalVport = NULL; + OvsInternalAdapterDown(); + hvSwitchPort = TRUE; + } + break; + case OVS_VPORT_TYPE_VXLAN: + { + POVS_TUNFLT_INIT_CONTEXT tunnelContext = NULL; + PIRP irp = NULL; + + tunnelContext = OvsAllocateMemory(sizeof(*tunnelContext)); + if (tunnelContext == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + break; + } + RtlZeroMemory(tunnelContext, sizeof(*tunnelContext)); + + tunnelContext->switchContext = switchContext; + tunnelContext->hvSwitchPort = hvSwitchPort; + tunnelContext->hvDelete = hvDelete; + tunnelContext->ovsDelete = ovsDelete; + tunnelContext->vport = vport; + + if (usrParamsCtx) { + tunnelContext->inputBuffer = usrParamsCtx->inputBuffer; + tunnelContext->outputBuffer = usrParamsCtx->outputBuffer; + tunnelContext->outputLength = usrParamsCtx->outputLength; + irp = usrParamsCtx->irp; + } + + status = OvsCleanupVxlanTunnel(irp, + vport, + OvsTunnelVportPendingUninit, + tunnelContext); + + switchContext->vxlanVport = NULL; + break; + } + case OVS_VPORT_TYPE_GRE: + case OVS_VPORT_TYPE_GRE64: + break; + case OVS_VPORT_TYPE_NETDEV: + hvSwitchPort = TRUE; + default: + break; + } + + if (STATUS_SUCCESS == status) { + OvsCleanupVportCommon(switchContext, + vport, + hvSwitchPort, + hvDelete, + ovsDelete); + } + + return status; } NDIS_STATUS @@ -1294,7 +1383,7 @@ OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) LIST_FORALL_SAFE(head, link, next) { POVS_VPORT_ENTRY vport; vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, portIdLink); - OvsRemoveAndDeleteVport(switchContext, vport, TRUE, TRUE, NULL); + OvsRemoveAndDeleteVport(NULL, switchContext, vport, TRUE, TRUE); } } /* @@ -1302,9 +1391,8 @@ OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) * 'portIdHashArray'. */ if (switchContext->virtualExternalVport) { - OvsRemoveAndDeleteVport(switchContext, - (POVS_VPORT_ENTRY)switchContext->virtualExternalVport, TRUE, TRUE, - NULL); + OvsRemoveAndDeleteVport(NULL, switchContext, + (POVS_VPORT_ENTRY)switchContext->virtualExternalVport, TRUE, TRUE); } for (UINT hash = 0; hash < OVS_MAX_VPORT_ARRAY_SIZE; hash++) { @@ -1317,7 +1405,7 @@ OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) ASSERT(OvsIsTunnelVportType(vport->ovsType) || (vport->ovsType == OVS_VPORT_TYPE_INTERNAL && vport->isBridgeInternal) || vport->isPresentOnHv == TRUE); - OvsRemoveAndDeleteVport(switchContext, vport, TRUE, TRUE, NULL); + OvsRemoveAndDeleteVport(NULL, switchContext, vport, TRUE, TRUE); } } @@ -1895,7 +1983,7 @@ Cleanup: /* * -------------------------------------------------------------------------- - * Command Handler for 'OVS_VPORT_CMD_NEW'. + * Command Handler for 'OVS_VPORT_CMD_GET'. * * The function handles the initial call to setup the dump state, as well as * subsequent calls to continue dumping data. @@ -2020,8 +2108,6 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, } else { ASSERT(OvsIsTunnelVportType(portType) || (portType == OVS_VPORT_TYPE_INTERNAL && isBridgeInternal)); - ASSERT(OvsGetTunnelVport(gOvsSwitchContext, portType) == NULL || - !OvsIsTunnelVportType(portType)); vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); if (vport == NULL) { @@ -2031,11 +2117,23 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, vportAllocated = TRUE; if (OvsIsTunnelVportType(portType)) { - status = OvsInitTunnelVport(vport, portType, VXLAN_UDP_PORT); + UINT16 udpPortDest = VXLAN_UDP_PORT; + PNL_ATTR attr = NlAttrFindNested(vportAttrs[OVS_VPORT_ATTR_OPTIONS], + OVS_TUNNEL_ATTR_DST_PORT); + if (attr) { + udpPortDest = NlAttrGetU16(attr); + } + + status = OvsInitTunnelVport(usrParamsCtx, + vport, + portType, + udpPortDest); + nlError = NlMapStatusToNlErr(status); } else { OvsInitBridgeInternalVport(vport); } + vportInitialized = TRUE; if (nlError == NL_ERROR_SUCCESS) { @@ -2047,6 +2145,8 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, * corresponding hyper-v switch part. */ vport->isPresentOnHv = TRUE; + } else { + goto Cleanup; } } @@ -2106,14 +2206,14 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, Cleanup: NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - if (nlError != NL_ERROR_SUCCESS) { + if ((nlError != NL_ERROR_SUCCESS) && (nlError != NL_ERROR_PENDING)) { POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR) usrParamsCtx->outputBuffer; if (vport && vportAllocated == TRUE) { if (vportInitialized == TRUE) { if (OvsIsTunnelVportType(portType)) { - OvsCleanupVxlanTunnel(vport); + OvsCleanupVxlanTunnel(NULL, vport, NULL, NULL); } } OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); @@ -2123,7 +2223,7 @@ Cleanup: *replyLen = msgError->nlMsg.nlmsgLen; } - return STATUS_SUCCESS; + return (status == STATUS_PENDING) ? STATUS_PENDING : STATUS_SUCCESS; } @@ -2297,18 +2397,25 @@ OvsDeleteVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, usrParamsCtx->outputLength, gOvsSwitchContext->dpNo); + *replyLen = msgOut->nlMsg.nlmsgLen; + /* * Mark the port as deleted from OVS userspace. If the port does not exist * on the Hyper-V switch, it gets deallocated. Otherwise, it stays. */ - OvsRemoveAndDeleteVport(gOvsSwitchContext, vport, FALSE, TRUE, NULL); - - *replyLen = msgOut->nlMsg.nlmsgLen; + status = OvsRemoveAndDeleteVport(usrParamsCtx, + gOvsSwitchContext, + vport, + FALSE, + TRUE); + if (status) { + nlError = NlMapStatusToNlErr(status); + } Cleanup: NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - if (nlError != NL_ERROR_SUCCESS) { + if ((nlError != NL_ERROR_SUCCESS) && (nlError != NL_ERROR_PENDING)) { POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR) usrParamsCtx->outputBuffer; @@ -2316,5 +2423,173 @@ Cleanup: *replyLen = msgError->nlMsg.nlmsgLen; } - return STATUS_SUCCESS; + return (status == STATUS_PENDING) ? STATUS_PENDING : STATUS_SUCCESS; +} + +static VOID +OvsTunnelVportPendingUninit(PVOID context, + NTSTATUS status, + UINT32 *replyLen) +{ + POVS_TUNFLT_INIT_CONTEXT tunnelContext = + (POVS_TUNFLT_INIT_CONTEXT) context; + POVS_SWITCH_CONTEXT switchContext = tunnelContext->switchContext; + POVS_VPORT_ENTRY vport = tunnelContext->vport; + POVS_MESSAGE msgIn = (POVS_MESSAGE)tunnelContext->inputBuffer; + POVS_MESSAGE msgOut = (POVS_MESSAGE)tunnelContext->outputBuffer; + NL_ERROR nlError = NlMapStatusToNlErr(status); + LOCK_STATE_EX lockState; + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + + if (msgIn && msgOut) { + /* Check the received status to reply to the caller. */ + if (STATUS_SUCCESS == status) { + OvsCreateMsgFromVport(vport, + msgIn, + msgOut, + tunnelContext->outputLength, + switchContext->dpNo); + + *replyLen = msgOut->nlMsg.nlmsgLen; + } else { + POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)msgOut; + + NlBuildErrorMsg(msgIn, msgError, nlError); + *replyLen = msgError->nlMsg.nlmsgLen; + } + } + + OvsCleanupVportCommon(switchContext, + vport, + tunnelContext->hvSwitchPort, + tunnelContext->hvDelete, + tunnelContext->ovsDelete); + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); +} + +static VOID +OvsTunnelVportPendingInit(PVOID context, + NTSTATUS status, + UINT32 *replyLen) +{ + POVS_TUNFLT_INIT_CONTEXT tunnelContext = + (POVS_TUNFLT_INIT_CONTEXT) context; + POVS_VPORT_ENTRY vport = tunnelContext->vport; + POVS_MESSAGE msgIn = (POVS_MESSAGE)tunnelContext->inputBuffer; + POVS_MESSAGE msgOut = (POVS_MESSAGE)tunnelContext->outputBuffer; + PCHAR portName; + ULONG portNameLen = 0; + UINT32 portType = 0; + NL_ERROR nlError = NL_ERROR_SUCCESS; + BOOLEAN error = TRUE; + + do { + if (!NT_SUCCESS(status)) { + nlError = NlMapStatusToNlErr(status); + break; + } + + static const NL_POLICY ovsVportPolicy[] = { + [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32, .optional = TRUE }, + [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32, .optional = FALSE }, + [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .maxLen = IFNAMSIZ, + .optional = FALSE }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC, + .optional = FALSE }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = TRUE }, + }; + + PNL_ATTR vportAttrs[ARRAY_SIZE(ovsVportPolicy)]; + + /* input buffer has been validated while validating write dev op. */ + ASSERT(msgIn != NULL); + + /* Output buffer has been validated while validating transact dev op. */ + ASSERT(msgOut != NULL && tunnelContext->outputLength >= sizeof *msgOut); + + if (!NlAttrParse((PNL_MSG_HDR)msgIn, + NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN, + NlMsgAttrsLen((PNL_MSG_HDR)msgIn), + ovsVportPolicy, vportAttrs, ARRAY_SIZE(vportAttrs))) { + nlError = NL_ERROR_INVAL; + break; + } + + portName = NlAttrGet(vportAttrs[OVS_VPORT_ATTR_NAME]); + portNameLen = NlAttrGetSize(vportAttrs[OVS_VPORT_ATTR_NAME]); + portType = NlAttrGetU32(vportAttrs[OVS_VPORT_ATTR_TYPE]); + + if (vport->portNo != OVS_DPPORT_NUMBER_INVALID) { + nlError = NL_ERROR_EXIST; + break; + } + + vport->ovsState = OVS_STATE_CONNECTED; + vport->nicState = NdisSwitchNicStateConnected; + + /* + * Allow the vport to be deleted, because there is no + * corresponding hyper-v switch part. + */ + vport->isPresentOnHv = TRUE; + + if (vportAttrs[OVS_VPORT_ATTR_PORT_NO] != NULL) { + /* + * XXX: when we implement the limit for OVS port number to be + * MAXUINT16, we'll need to check the port number received from the + * userspace. + */ + vport->portNo = + NlAttrGetU32(vportAttrs[OVS_VPORT_ATTR_PORT_NO]); + } else { + vport->portNo = + OvsComputeVportNo(gOvsSwitchContext); + if (vport->portNo == OVS_DPPORT_NUMBER_INVALID) { + nlError = NL_ERROR_NOMEM; + break; + } + } + + /* The ovs port name must be uninitialized. */ + ASSERT(vport->ovsName[0] == '\0'); + ASSERT(portNameLen <= OVS_MAX_PORT_NAME_LENGTH); + + RtlCopyMemory(vport->ovsName, portName, portNameLen); + /* if we don't have options, then vport->portOptions will be NULL */ + vport->portOptions = vportAttrs[OVS_VPORT_ATTR_OPTIONS]; + + /* + * XXX: when we implement OVS_DP_ATTR_USER_FEATURES in datapath, + * we'll need to check the OVS_DP_F_VPORT_PIDS flag: if it is set, + * it means we have an array of pids, instead of a single pid. + * ATM we assume we have one pid only. + */ + vport->upcallPid = + NlAttrGetU32(vportAttrs[OVS_VPORT_ATTR_UPCALL_PID]); + + status = InitOvsVportCommon(gOvsSwitchContext, vport); + ASSERT(status == STATUS_SUCCESS); + + OvsCreateMsgFromVport(vport, + msgIn, + msgOut, + tunnelContext->outputLength, + gOvsSwitchContext->dpNo); + + *replyLen = msgOut->nlMsg.nlmsgLen; + + error = FALSE; + } while (error); + + if (error) { + POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR) msgOut; + + OvsCleanupVxlanTunnel(NULL, vport, NULL, NULL); + OvsFreeMemory(vport); + + NlBuildErrorMsg(msgIn, msgError, nlError); + *replyLen = msgError->nlMsg.nlmsgLen; + } } diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index 348fbfd7d..593805315 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -207,15 +207,15 @@ OvsIsBridgeInternalVport(POVS_VPORT_ENTRY vport) return vport->isBridgeInternal == TRUE; } -VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport, - BOOLEAN hvDelete, BOOLEAN ovsDelete, - BOOLEAN *vportDeallocated); +NTSTATUS OvsRemoveAndDeleteVport(PVOID usrParamsCtx, + POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, + BOOLEAN hvDelete, BOOLEAN ovsDelete); NDIS_STATUS InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport); -NTSTATUS OvsInitTunnelVport(POVS_VPORT_ENTRY vport, OVS_VPORT_TYPE ovsType, - UINT16 dstport); +NTSTATUS OvsInitTunnelVport(PVOID usrParamsCtx, POVS_VPORT_ENTRY vport, + OVS_VPORT_TYPE ovsType, UINT16 dstport); NTSTATUS OvsInitBridgeInternalVport(POVS_VPORT_ENTRY vport); POVS_VPORT_ENTRY OvsAllocateVport(VOID); diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 8c5718506..9d4266544 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -50,14 +50,57 @@ extern POVS_SWITCH_CONTEXT gOvsSwitchContext; /* + *---------------------------------------------------------------------------- + * This function verifies if the VXLAN tunnel already exists, in order to + * avoid sending a duplicate request to the WFP base filtering engine. + *---------------------------------------------------------------------------- + */ +static BOOLEAN +OvsIsTunnelFilterCreated(POVS_SWITCH_CONTEXT switchContext, + UINT16 udpPortDest) +{ + for (UINT hash = 0; hash < OVS_MAX_VPORT_ARRAY_SIZE; hash++) { + PLIST_ENTRY head, link, next; + + head = &(switchContext->portNoHashArray[hash & OVS_VPORT_MASK]); + LIST_FORALL_SAFE(head, link, next) { + POVS_VPORT_ENTRY vport = NULL; + POVS_VXLAN_VPORT vxlanPort = NULL; + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, portNoLink); + vxlanPort = (POVS_VXLAN_VPORT)vport->priv; + if (vxlanPort) { + if ((udpPortDest == vxlanPort->dstPort)) { + /* The VXLAN tunnel was already created. */ + return TRUE; + } + } + } + } + + return FALSE; +} + +/* + *---------------------------------------------------------------------------- + * This function allocates and initializes the OVS_VXLAN_VPORT. The function + * also creates a WFP tunnel filter for the necessary destination port. The + * tunnel filter create request is passed to the tunnel filter threads that + * will complete the request at a later time when IRQL is lowered to + * PASSIVE_LEVEL. + * * udpDestPort: the vxlan is set as payload to a udp frame. If the destination * port of an udp frame is udpDestPort, we understand it to be vxlan. + *---------------------------------------------------------------------------- */ NTSTATUS -OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, - UINT16 udpDestPort) +OvsInitVxlanTunnel(PIRP irp, + POVS_VPORT_ENTRY vport, + UINT16 udpDestPort, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext) { - POVS_VXLAN_VPORT vxlanPort; + NTSTATUS status = STATUS_SUCCESS; + POVS_VXLAN_VPORT vxlanPort = NULL; vxlanPort = OvsAllocateMemoryWithTag(sizeof (*vxlanPort), OVS_VXLAN_POOL_TAG); @@ -67,28 +110,56 @@ OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, RtlZeroMemory(vxlanPort, sizeof(*vxlanPort)); vxlanPort->dstPort = udpDestPort; - /* - * since we are installing the WFP filter before the port is created - * We need to check if it is the same number - * XXX should be removed later - */ - ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT); vport->priv = (PVOID)vxlanPort; - return STATUS_SUCCESS; + if (!OvsIsTunnelFilterCreated(gOvsSwitchContext, udpDestPort)) { + status = OvsTunelFilterCreate(irp, + udpDestPort, + &vxlanPort->filterID, + callback, + tunnelContext); + } else { + status = STATUS_OBJECT_NAME_EXISTS; + } + + return status; } - -VOID -OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport) +/* + *---------------------------------------------------------------------------- + * This function releases the OVS_VXLAN_VPORT. The function also deletes the + * WFP tunnel filter previously created. The tunnel filter delete request is + * passed to the tunnel filter threads that will complete the request at a + * later time when IRQL is lowered to PASSIVE_LEVEL. + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsCleanupVxlanTunnel(PIRP irp, + POVS_VPORT_ENTRY vport, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext) { + NTSTATUS status = STATUS_SUCCESS; + POVS_VXLAN_VPORT vxlanPort = NULL; + if (vport->ovsType != OVS_VPORT_TYPE_VXLAN || vport->priv == NULL) { - return; + return STATUS_SUCCESS; + } + + vxlanPort = (POVS_VXLAN_VPORT)vport->priv; + + if (vxlanPort->filterID != 0) { + status = OvsTunelFilterDelete(irp, + vxlanPort->filterID, + callback, + tunnelContext); } OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG); vport->priv = NULL; + + return status; } @@ -475,9 +546,6 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, break; } - /* XXX Should be tested against the dynamic port # in the VXLAN vport */ - ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); - VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, sizeof(*VxlanHeader), layers.l7Offset, diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index d84796daa..248a5dcde 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -24,6 +24,7 @@ typedef struct _OVS_VXLAN_VPORT { UINT64 outPkts; UINT64 slowInPkts; UINT64 slowOutPkts; + UINT64 filterID; /* * To be filled */ @@ -47,10 +48,16 @@ typedef struct VXLANHdr { UINT32 reserved2:8; } VXLANHdr; -NTSTATUS OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, - UINT16 udpDestPort); +NTSTATUS OvsInitVxlanTunnel(PIRP irp, + POVS_VPORT_ENTRY vport, + UINT16 udpDestPort, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext); -VOID OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport); +NTSTATUS OvsCleanupVxlanTunnel(PIRP irp, + POVS_VPORT_ENTRY vport, + PFNTunnelVportPendingOp callback, + PVOID tunnelContext); NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, OvsIPv4TunnelKey *tunnelKey); From ffde5f8f292055c36d41651967ca057d30ded877 Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Wed, 27 May 2015 16:58:25 +0000 Subject: [PATCH 056/146] datapath-windows: Support for multiple VXLAN tunnels At the moment the OVS extension supports only one VXLAN tunnel that is cached in the extension switch context. Replaced the latter cached pointer with an array list that contains all VXLAN tunnel vports. Signed-off-by: Sorin Vinturis Reported-by: Alin Gabriel Serdean Reported-at: https://github.com/openvswitch/ovs-issues/issues/64 Acked-by: Eitan Eliahu Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/Actions.c | 13 ++++++--- datapath-windows/ovsext/Switch.c | 16 +++++++---- datapath-windows/ovsext/Switch.h | 6 +++-- datapath-windows/ovsext/Tunnel.c | 3 ++- datapath-windows/ovsext/Vport.c | 44 ++++++++++++++++++++++++++----- datapath-windows/ovsext/Vport.h | 21 ++++----------- datapath-windows/ovsext/Vxlan.c | 2 +- datapath-windows/ovsext/Vxlan.h | 2 +- 8 files changed, 70 insertions(+), 37 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index a93fe0331..79e464c80 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -184,6 +184,9 @@ OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx, } /* + * XXX: When we search for the tunnelVport we also need to specify the + * tunnelling protocol or the L4 protocol as key as well, because there are + * different protocols that can use the same destination port. * -------------------------------------------------------------------------- * OvsDetectTunnelRxPkt -- * Utility function for an RX packet to detect its tunnel type. @@ -203,16 +206,17 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, * packets only if they are at least VXLAN header size. */ if (!flowKey->ipKey.nwFrag && - flowKey->ipKey.nwProto == IPPROTO_UDP && - flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) { - tunnelVport = ovsFwdCtx->switchContext->vxlanVport; - ovsActionStats.rxVxlan++; + flowKey->ipKey.nwProto == IPPROTO_UDP) { + UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort); } // We might get tunnel packets even before the tunnel gets initialized. if (tunnelVport) { ASSERT(ovsFwdCtx->tunnelRxNic == NULL); ovsFwdCtx->tunnelRxNic = tunnelVport; + ovsActionStats.rxVxlan++; return TRUE; } @@ -1318,6 +1322,7 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); ASSERT(status == NDIS_STATUS_SUCCESS); tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); + tunKey.dst_port = key->ipKey.l4.tpDst; RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); break; diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/Switch.c index 416bcc03f..f8778546c 100644 --- a/datapath-windows/ovsext/Switch.c +++ b/datapath-windows/ovsext/Switch.c @@ -367,6 +367,8 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) sizeof(LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE, OVS_SWITCH_POOL_TAG); switchContext->pidHashArray = (PLIST_ENTRY)OvsAllocateMemoryWithTag( sizeof(LIST_ENTRY) * OVS_MAX_PID_ARRAY_SIZE, OVS_SWITCH_POOL_TAG); + switchContext->tunnelVportsArray = (PLIST_ENTRY)OvsAllocateMemoryWithTag( + sizeof(LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE, OVS_SWITCH_POOL_TAG); status = OvsAllocateFlowTable(&switchContext->datapath, switchContext); if (status == NDIS_STATUS_SUCCESS) { @@ -377,7 +379,8 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) switchContext->portNoHashArray == NULL || switchContext->ovsPortNameHashArray == NULL || switchContext->portIdHashArray== NULL || - switchContext->pidHashArray == NULL) { + switchContext->pidHashArray == NULL || + switchContext->tunnelVportsArray == NULL) { if (switchContext->dispatchLock) { NdisFreeRWLock(switchContext->dispatchLock); } @@ -398,6 +401,10 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) OVS_SWITCH_POOL_TAG); } + if (switchContext->tunnelVportsArray) { + OvsFreeMemory(switchContext->tunnelVportsArray); + } + OvsDeleteFlowTable(&switchContext->datapath); OvsCleanupBufferPool(switchContext); @@ -407,12 +414,9 @@ OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { InitializeListHead(&switchContext->ovsPortNameHashArray[i]); - } - for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { InitializeListHead(&switchContext->portIdHashArray[i]); - } - for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { InitializeListHead(&switchContext->portNoHashArray[i]); + InitializeListHead(&switchContext->tunnelVportsArray[i]); } for (i = 0; i < OVS_MAX_PID_ARRAY_SIZE; i++) { @@ -465,6 +469,8 @@ OvsDeleteSwitchContext(POVS_SWITCH_CONTEXT switchContext) OvsFreeMemoryWithTag(switchContext->pidHashArray, OVS_SWITCH_POOL_TAG); switchContext->pidHashArray = NULL; + OvsFreeMemory(switchContext->tunnelVportsArray); + switchContext->tunnelVportsArray = NULL; OvsDeleteFlowTable(&switchContext->datapath); OvsCleanupBufferPool(switchContext); diff --git a/datapath-windows/ovsext/Switch.h b/datapath-windows/ovsext/Switch.h index 6ec34e1f4..8e1eb5f2c 100644 --- a/datapath-windows/ovsext/Switch.h +++ b/datapath-windows/ovsext/Switch.h @@ -132,8 +132,6 @@ typedef struct _OVS_SWITCH_CONTEXT POVS_VPORT_ENTRY virtualExternalVport; // the virtual adapter vport POVS_VPORT_ENTRY internalVport; - POVS_VPORT_ENTRY vxlanVport; - /* * 'portIdHashArray' ONLY contains ports that exist on the Hyper-V switch, * namely: VIF (vNIC) ports, external port and Hyper-V internal port. @@ -148,11 +146,15 @@ typedef struct _OVS_SWITCH_CONTEXT * exist on the Hyper-V switch, and 'numNonHvVports' counts such ports in * 'portNoHashArray'. * + * 'tunnelVportsArray' contains tunnel ports that are added from OVS + * userspace. Currently only VXLAN tunnels are added in this list. + * * 'ovsPortNameHashArray' contains the same entries as 'portNoHashArray' but * hashed on a different key. */ PLIST_ENTRY portIdHashArray; // based on Hyper-V portId PLIST_ENTRY portNoHashArray; // based on ovs port number + PLIST_ENTRY tunnelVportsArray; // based on ovs dst port number PLIST_ENTRY ovsPortNameHashArray; // based on ovsName PLIST_ENTRY pidHashArray; // based on packet pids NDIS_SPIN_LOCK pidHashLock; // Lock for pidHash table diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c index fed58f1c3..002f18024 100644 --- a/datapath-windows/ovsext/Tunnel.c +++ b/datapath-windows/ovsext/Tunnel.c @@ -285,7 +285,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; - vport = gOvsSwitchContext->vxlanVport; + vport = OvsFindTunnelVportByDstPort(gOvsSwitchContext, + htons(tunnelKey.dst_port)); if (vport == NULL){ status = STATUS_UNSUCCESSFUL; diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index 66f918906..5a1b64f18 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -600,6 +600,25 @@ OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, } +POVS_VPORT_ENTRY +OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, + UINT16 dstPort) +{ + POVS_VPORT_ENTRY vport; + PLIST_ENTRY head, link; + UINT32 hash = OvsJhashBytes((const VOID *)&dstPort, sizeof(dstPort), + OVS_HASH_BASIS); + head = &(switchContext->tunnelVportsArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, tunnelVportLink); + if (((POVS_VXLAN_VPORT)vport->priv)->dstPort == dstPort) { + return vport; + } + } + return NULL; +} + + POVS_VPORT_ENTRY OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, PSTR name) @@ -1048,8 +1067,8 @@ InitHvVportCommon(POVS_SWITCH_CONTEXT switchContext, * -------------------------------------------------------------------------- * Functionality common to any port added from OVS userspace. * - * Inserts the port into 'portIdHashArray', 'ovsPortNameHashArray' and caches - * the pointer in the 'switchContext' if needed. + * Inserts the port into 'portNoHashArray', 'ovsPortNameHashArray' and in + * 'tunnelVportsArray' if appropriate. * -------------------------------------------------------------------------- */ NDIS_STATUS @@ -1060,9 +1079,17 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, switch(vport->ovsType) { case OVS_VPORT_TYPE_VXLAN: - switchContext->vxlanVport = vport; + { + POVS_VXLAN_VPORT vxlanVport = (POVS_VXLAN_VPORT)vport->priv; + hash = OvsJhashBytes(&vxlanVport->dstPort, + sizeof(vxlanVport->dstPort), + OVS_HASH_BASIS); + InsertHeadList( + &gOvsSwitchContext->tunnelVportsArray[hash & OVS_VPORT_MASK], + &vport->tunnelVportLink); switchContext->numNonHvVports++; break; + } case OVS_VPORT_TYPE_INTERNAL: if (vport->isBridgeInternal) { switchContext->numNonHvVports++; @@ -1131,6 +1158,11 @@ OvsCleanupVportCommon(POVS_SWITCH_CONTEXT switchContext, InitializeListHead(&vport->ovsNameLink); RemoveEntryList(&vport->portNoLink); InitializeListHead(&vport->portNoLink); + if (OVS_VPORT_TYPE_VXLAN == vport->ovsType) { + RemoveEntryList(&vport->tunnelVportLink); + InitializeListHead(&vport->tunnelVportLink); + } + deletedOnOvs = TRUE; } @@ -1224,8 +1256,6 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, vport, OvsTunnelVportPendingUninit, tunnelContext); - - switchContext->vxlanVport = NULL; break; } case OVS_VPORT_TYPE_GRE: @@ -1386,6 +1416,7 @@ OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) OvsRemoveAndDeleteVport(NULL, switchContext, vport, TRUE, TRUE); } } + /* * Remove 'virtualExternalVport' as well. This port is not part of the * 'portIdHashArray'. @@ -1395,9 +1426,9 @@ OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) (POVS_VPORT_ENTRY)switchContext->virtualExternalVport, TRUE, TRUE); } + for (UINT hash = 0; hash < OVS_MAX_VPORT_ARRAY_SIZE; hash++) { PLIST_ENTRY head, link, next; - head = &(switchContext->portNoHashArray[hash & OVS_VPORT_MASK]); LIST_FORALL_SAFE(head, link, next) { POVS_VPORT_ENTRY vport; @@ -1411,7 +1442,6 @@ OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) ASSERT(switchContext->virtualExternalVport == NULL); ASSERT(switchContext->internalVport == NULL); - ASSERT(switchContext->vxlanVport == NULL); } diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index 593805315..84ac3d3fa 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -84,6 +84,7 @@ typedef struct _OVS_VPORT_ENTRY { LIST_ENTRY ovsNameLink; LIST_ENTRY portIdLink; LIST_ENTRY portNoLink; + LIST_ENTRY tunnelVportLink; OVS_VPORT_STATE ovsState; OVS_VPORT_TYPE ovsType; @@ -135,10 +136,8 @@ typedef struct _OVS_VPORT_ENTRY { struct _OVS_SWITCH_CONTEXT; -POVS_VPORT_ENTRY -OvsFindVportByPortNo(struct _OVS_SWITCH_CONTEXT *switchContext, - UINT32 portNo); - +POVS_VPORT_ENTRY OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, + UINT32 portNo); /* "name" is null-terminated */ POVS_VPORT_ENTRY OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, PSTR name); @@ -147,6 +146,8 @@ POVS_VPORT_ENTRY OvsFindVportByHvNameA(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext, NDIS_SWITCH_PORT_ID portId, NDIS_SWITCH_NIC_INDEX index); +POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, + UINT16 dstPort); NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); @@ -180,18 +181,6 @@ OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) ovsType == OVS_VPORT_TYPE_GRE64; } -static __inline POVS_VPORT_ENTRY -OvsGetTunnelVport(POVS_SWITCH_CONTEXT switchContext, - OVS_VPORT_TYPE ovsType) -{ - switch(ovsType) { - case OVS_VPORT_TYPE_VXLAN: - return switchContext->vxlanVport; - default: - return NULL; - } -} - static __inline BOOLEAN OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) { diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 9d4266544..9935bdff0 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -274,7 +274,7 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, /* UDP header */ udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); udpHdr->source = htons(tunKey->flow_hash | 32768); - udpHdr->dest = VXLAN_UDP_PORT_NBO; + udpHdr->dest = htons(tunKey->dst_port); udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + sizeof *udpHdr + sizeof *vxlanHdr); udpHdr->check = 0; diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index 248a5dcde..0e2830496 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -19,7 +19,7 @@ #include "NetProto.h" typedef struct _OVS_VXLAN_VPORT { - UINT32 dstPort; + UINT16 dstPort; UINT64 inPkts; UINT64 outPkts; UINT64 slowInPkts; From 68f1df92ce8dab00bbd3bf46819a758e1d193dcf Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Wed, 27 May 2015 16:58:26 +0000 Subject: [PATCH 057/146] datapath-windows: Document OVS tunnel filter callout Signed-off-by: Sorin Vinturis Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/TunnelFilter.c | 225 ++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 4 deletions(-) diff --git a/datapath-windows/ovsext/TunnelFilter.c b/datapath-windows/ovsext/TunnelFilter.c index a47e0aa5b..08cc13f34 100644 --- a/datapath-windows/ovsext/TunnelFilter.c +++ b/datapath-windows/ovsext/TunnelFilter.c @@ -66,7 +66,7 @@ * Callout and sublayer GUIDs */ -// b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8 +/* b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8 */ DEFINE_GUID( OVS_TUNNEL_CALLOUT_V4, 0xb16b0a6e, @@ -173,12 +173,24 @@ static VOID OvsTunnelFilterThreadUninit(POVS_TUNFLT_THREAD_CONTEXT threadCtx * Callout driver global variables */ +/* Pointer to the device object that must be create before we can register our + * callout to the base filtering engine. */ static PDEVICE_OBJECT gDeviceObject = NULL; +/* Handle to an open session to the filter engine that is used for adding + * tunnel's callout. */ static HANDLE gEngineHandle = NULL; +/* A pointer to the received handle that is associated with the registration of + * the OvsTunnelProviderBfeCallback callback. */ static HANDLE gTunnelProviderBfeHandle = NULL; +/* A pointer to the received handle that is associated with the registration of + * the OvsTunnelInitBfeCallback callback. */ static HANDLE gTunnelInitBfeHandle = NULL; -static HANDLE gBfeSubscriptionHandle = NULL; +/* Runtime identifier for tunnel's callout which is retrieved at tunnel + * initialization phase when the callout is registered. This ID is then used + * for removing the callout object from the system at tunnel + * uninitialization phase. */ static UINT32 gCalloutIdV4 = 0; +/* Array used for storing tunnel thread's private data. */ static OVS_TUNFLT_THREAD_CONTEXT gTunnelThreadCtx[OVS_TUNFLT_MAX_THREADS] = { 0 }; /* @@ -548,6 +560,12 @@ Exit: return status; } +/* + * -------------------------------------------------------------------------- + * This function adds OVS system provider to the system if the BFE (Base + * Filtering Engine) is running. + * -------------------------------------------------------------------------- + */ VOID NTAPI OvsTunnelProviderBfeCallback(PVOID context, FWPM_SERVICE_STATE bfeState) @@ -565,6 +583,12 @@ OvsTunnelProviderBfeCallback(PVOID context, } } +/* + * -------------------------------------------------------------------------- + * This function registers the OvsTunnelProviderBfeCallback callback that is + * called whenever there is a change to the state of base filtering engine. + * -------------------------------------------------------------------------- + */ NTSTATUS OvsSubscribeTunnelProviderBfeStateChanges(PVOID deviceObject) { @@ -585,6 +609,13 @@ OvsSubscribeTunnelProviderBfeStateChanges(PVOID deviceObject) return status; } +/* + * -------------------------------------------------------------------------- + * This function unregisters the OvsTunnelProviderBfeCallback callback that + * was previously registered by OvsSubscribeTunnelProviderBfeStateChanges + * function. + * -------------------------------------------------------------------------- + */ VOID OvsUnsubscribeTunnelProviderBfeStateChanges() { @@ -601,6 +632,32 @@ OvsUnsubscribeTunnelProviderBfeStateChanges() } } +/* + * -------------------------------------------------------------------------- + * This function registers the OVS system provider if the BFE (Base Filtering + * Engine) is running. + * Otherwise, it will register the OvsTunnelProviderBfeCallback callback. + + * Note: Before calling FwpmBfeStateGet, the callout driver must call the + * FwpmBfeStateSubscribeChanges function to register the callback function + * to be called whenever the state of the filter engine changes. + * + * Register WFP system provider call hierarchy: + * + * + * + * + * --> registers OvsTunnelProviderBfeCallback callback + * + * --> if BFE is running: + * + * --> if BFE is running: + * + * + * --> unregisters OvsTunnelProviderBfeCallback callback + * + * -------------------------------------------------------------------------- + */ VOID OvsRegisterSystemProvider(PVOID deviceObject) { @@ -621,7 +678,23 @@ OvsRegisterSystemProvider(PVOID deviceObject) } } -VOID OvsUnregisterSystemProvider() +/* + * -------------------------------------------------------------------------- + * This function removes the OVS system provider and unregisters the + * OvsTunnelProviderBfeCallback callback from BFE (Base Filtering Engine). + * + * Unregister WFP system provider call hierarchy: + * + * + * + * + * + * --> unregisters OvsTunnelProviderBfeCallback callback + * + * -------------------------------------------------------------------------- + */ +VOID +OvsUnregisterSystemProvider() { HANDLE engineSession = NULL; @@ -634,6 +707,11 @@ VOID OvsUnregisterSystemProvider() OvsUnsubscribeTunnelProviderBfeStateChanges(); } +/* + * -------------------------------------------------------------------------- + * This function initializes the tunnel filter if the BFE is running. + * -------------------------------------------------------------------------- + */ VOID NTAPI OvsTunnelInitBfeCallback(PVOID context, FWPM_SERVICE_STATE bfeState) @@ -651,6 +729,12 @@ OvsTunnelInitBfeCallback(PVOID context, } } +/* + * -------------------------------------------------------------------------- + * This function registers the OvsTunnelInitBfeCallback callback that is + * called whenever there is a change to the state of base filtering engine. + * -------------------------------------------------------------------------- + */ NTSTATUS OvsSubscribeTunnelInitBfeStateChanges(PDRIVER_OBJECT driverObject, PVOID deviceObject) @@ -672,6 +756,13 @@ OvsSubscribeTunnelInitBfeStateChanges(PDRIVER_OBJECT driverObject, return status; } +/* + * -------------------------------------------------------------------------- + * This function unregisters the OvsTunnelInitBfeCallback callback that + * was previously registered by OvsSubscribeTunnelInitBfeStateChanges + * function. + * -------------------------------------------------------------------------- + */ VOID OvsUnsubscribeTunnelInitBfeStateChanges() { @@ -688,6 +779,38 @@ OvsUnsubscribeTunnelInitBfeStateChanges() } } +/* + * -------------------------------------------------------------------------- + * This function initializes the OVS tunnel filter if the BFE (Base Filtering + * Engine) is running. + * Otherwise, it will register the OvsTunnelInitBfeCallback callback. + + * Note: Before calling FwpmBfeStateGet, the callout driver must call the + * FwpmBfeStateSubscribeChanges function to register the callback function + * to be called whenever the state of the filter engine changes. + * + * Initialize OVS tunnel filter call hierarchy: + * + * + * + * + * --> registers OvsTunnelInitBfeCallback callback + * + * --> if BFE is running: + * + * + * + * + * --> if BFE is running: + * + * + * + * + * + * --> unregisters OvsTunnelInitBfeCallback callback + * + * -------------------------------------------------------------------------- + */ NTSTATUS OvsInitTunnelFilter(PDRIVER_OBJECT driverObject, PVOID deviceObject) { @@ -712,6 +835,24 @@ OvsInitTunnelFilter(PDRIVER_OBJECT driverObject, PVOID deviceObject) return status; } +/* + * -------------------------------------------------------------------------- + * This function uninitializes the OVS tunnel filter and unregisters the + * OvsTunnelInitBfeCallback callback from BFE. + * + * Uninitialize OVS tunnel filter call hierarchy: + * + * + * + * + * + * + * + * + * --> unregisters OvsTunnelInitBfeCallback callback + * + * -------------------------------------------------------------------------- + */ VOID OvsUninitTunnelFilter(PDRIVER_OBJECT driverObject) { OvsTunnelFilterUninitialize(driverObject); @@ -799,6 +940,13 @@ OvsTunnelFilterExecuteAction(HANDLE engineSession, return status; } +/* + * -------------------------------------------------------------------------- + * This function pops the whole request entries from the queue and returns the + * number of entries through the 'count' parameter. The operation is + * synchronized using request list spinlock. + * -------------------------------------------------------------------------- + */ VOID OvsTunnelFilterRequestPopList(POVS_TUNFLT_REQUEST_LIST listRequests, PLIST_ENTRY head, @@ -828,6 +976,12 @@ OvsTunnelFilterRequestPopList(POVS_TUNFLT_REQUEST_LIST listRequests, NdisReleaseSpinLock(&listRequests->spinlock); } +/* + * -------------------------------------------------------------------------- + * This function pushes the received request to the list while holding the + * request list spinlock. + * -------------------------------------------------------------------------- + */ VOID OvsTunnelFilterRequestPush(POVS_TUNFLT_REQUEST_LIST listRequests, POVS_TUNFLT_REQUEST request) @@ -840,6 +994,16 @@ OvsTunnelFilterRequestPush(POVS_TUNFLT_REQUEST_LIST listRequests, NdisReleaseSpinLock(&listRequests->spinlock); } +/* + * -------------------------------------------------------------------------- + * This function pushes the received request to the corresponding thread + * request queue. The arrival of the new request is signaled to the thread, + * in order to start processing it. + * + * For a uniform distribution of requests to thread queues, a thread index is + * calculated based on the received destination port. + * -------------------------------------------------------------------------- + */ VOID OvsTunnelFilterThreadPush(POVS_TUNFLT_REQUEST request) { @@ -966,7 +1130,14 @@ OvsTunnelFilterRequestListProcess(POVS_TUNFLT_THREAD_CONTEXT threadCtx) /* *---------------------------------------------------------------------------- - * System thread routine that handles tunnel filter create/delete requests. + * System thread routine that processes thread's requests queue. The thread + * routine initializes thread's necessary data and waits on two events, + * requestEvent and stopEvent. Whenever a request is pushed to the thread's + * queue, the requestEvent is signaled and the thread routine starts processing + * the arrived requests. When stopEvent is signaled, all subsequent requests + * are completed with STATUS_CANCELED, without being added to the thread's + * queue, and the routine finishes processing all existing requests from the + * queue before uninitializing the thread and exiting. *---------------------------------------------------------------------------- */ _Use_decl_annotations_ @@ -1117,6 +1288,13 @@ OvsTunnelFilterThreadStop(POVS_TUNFLT_THREAD_CONTEXT threadCtx, } } +/* + * -------------------------------------------------------------------------- + * This function initializes thread's necessary data. Each thread has its own + * session object to the BFE that is used for processing the requests from + * the thread's queue. + * -------------------------------------------------------------------------- + */ static NTSTATUS OvsTunnelFilterThreadInit(POVS_TUNFLT_THREAD_CONTEXT threadCtx) { @@ -1148,6 +1326,12 @@ OvsTunnelFilterThreadInit(POVS_TUNFLT_THREAD_CONTEXT threadCtx) return status; } +/* + * -------------------------------------------------------------------------- + * This function uninitializes thread's private data. Thread's engine session + * handle is closed and set to NULL. + * -------------------------------------------------------------------------- + */ static VOID OvsTunnelFilterThreadUninit(POVS_TUNFLT_THREAD_CONTEXT threadCtx) { @@ -1159,6 +1343,13 @@ OvsTunnelFilterThreadUninit(POVS_TUNFLT_THREAD_CONTEXT threadCtx) } } +/* + * -------------------------------------------------------------------------- + * This function creates a new tunnel filter request and push it to a thread + * queue. If the thread stop event is signaled, the request is completed with + * STATUS_CANCELLED without pushing it to any queue. + * -------------------------------------------------------------------------- + */ NTSTATUS OvsTunnelFilterQueueRequest(PIRP irp, UINT16 remotePort, @@ -1239,6 +1430,19 @@ OvsTunnelFilterQueueRequest(PIRP irp, * PASSIVE_LEVEL. Because the function is called at IRQL = DISPATCH_LEVEL, * we register an OVS_TUN_FILTER_CREATE request that will be processed by * the tunnel filter thread routine at IRQL = PASSIVE_LEVEL. + * + * OVS VXLAN port add call hierarchy: + * + * + * + * + * + * --> if thread STOP event is signalled: + * --> Complete request with STATUS_CANCELLED + * --> EXIT + * + * --> add the request to one of tunnel thread queues + * * -------------------------------------------------------------------------- */ NTSTATUS @@ -1265,6 +1469,19 @@ OvsTunelFilterCreate(PIRP irp, * PASSIVE_LEVEL. Because the function is called at IRQL = DISPATCH_LEVEL, * we register an OVS_TUN_FILTER_DELETE request that will be processed by * the tunnel filter thread routine at IRQL = PASSIVE_LEVEL. + * + * OVS VXLAN port delete call hierarchy: + * + * + * + * + * + * --> if thread STOP event is signalled: + * --> Complete request with STATUS_CANCELLED + * --> EXIT + * + * --> add the request to one of tunnel thread queues + * * -------------------------------------------------------------------------- */ NTSTATUS From 8909c56c467881236b655e5c4f4c5caa63178cd5 Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Wed, 27 May 2015 17:08:00 +0000 Subject: [PATCH 058/146] datapath-windows: Removed memory barrier and master lock There is no need to enforce Netlink serialization on transactions sent from userspace. The access to the driver's shared resources is synchronized anyway. Thus I have removed the master lock. I also removed the memory barrier from filter dispatch routine. A memory barrier is already in place in OvsReleaseSwitchContext function, due to the use of InterlockedCompareExchange function. Signed-off-by: Sorin Vinturis Acked-by: Eitan Eliahu Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/Datapath.c | 9 --------- datapath-windows/ovsext/Datapath.h | 14 -------------- 2 files changed, 23 deletions(-) diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c index 185bfb53c..b5832de3b 100644 --- a/datapath-windows/ovsext/Datapath.c +++ b/datapath-windows/ovsext/Datapath.c @@ -726,12 +726,6 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject, goto exit; } - /* Concurrent netlink operations are not supported. */ - if (InterlockedCompareExchange((LONG volatile *)&instance->inUse, 1, 0)) { - status = STATUS_RESOURCE_IN_USE; - goto done; - } - /* * Validate the input/output buffer arguments depending on the type of the * operation. @@ -921,9 +915,6 @@ done: OvsReleaseSwitchContext(gOvsSwitchContext); exit: - KeMemoryBarrier(); - instance->inUse = 0; - /* Should not complete a pending IRP unless proceesing is completed. */ if (status == STATUS_PENDING) { /* STATUS_PENDING is returned by the NL handler when the request is diff --git a/datapath-windows/ovsext/Datapath.h b/datapath-windows/ovsext/Datapath.h index dbc9dea58..2c61d8217 100644 --- a/datapath-windows/ovsext/Datapath.h +++ b/datapath-windows/ovsext/Datapath.h @@ -52,20 +52,6 @@ typedef struct _OVS_OPEN_INSTANCE { POVS_USER_PACKET_QUEUE packetQueue; UINT32 pid; - /* - * On platforms that support netlink natively, there's generally some form of - * serialization between concurrent calls to netlink sockets. However, OVS - * userspace guarantees that a given netlink handle is not concurrently used. - * Despite this, we do want to have some basic checks in the kernel to make - * sure that things don't break if there are concurrent calls. - * - * This is generally not an issue since kernel data structure access should - * be sychronized anyway. Only reason to have this safeguared is to protect - * the state in "state-aware" read calls which rely on previous state. This - * restriction might go away as the userspace code gets implemented. - */ - INT inUse; - struct { POVS_MESSAGE ovsMsg; /* OVS message passed during dump start. */ UINT32 index[2]; /* markers to continue dump from. One or more From 96775a1c0c646e8b860ccfbd63ff6d71e355c6b0 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 18 May 2015 10:26:14 -0700 Subject: [PATCH 059/146] ofp-actions: Improve conjunction error message. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- lib/ofp-actions.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index 2240b86af..e18229da6 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -5721,8 +5721,9 @@ ofpacts_verify(const struct ofpact ofpacts[], size_t ofpacts_len, if (a->type == OFPACT_CONJUNCTION) { OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { if (a->type != OFPACT_CONJUNCTION) { - VLOG_WARN("when %s action is present, it must be the only " - "kind of action used", ofpact_name(a->type)); + VLOG_WARN("when conjunction action is present, it must be " + "the only kind of action used (saw '%s' action)", + ofpact_name(a->type)); return OFPERR_NXBAC_BAD_CONJUNCTION; } } From 58b11928efb7de6fc8356e05dcf17fe9851bb90f Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Thu, 28 May 2015 21:00:39 +0000 Subject: [PATCH 060/146] datapath-windows: Multiple NBLs support for ingress data path Added support for creating and handling multiple NBLs with only one NB for ingress data path. Signed-off-by: Sorin Vinturis Reported-by: Alessandro Pilotti Reported-at: https://github.com/openvswitch/ovs-issues/issues/2 Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/PacketIO.c | 90 +++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 20 deletions(-) diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index ed629fdde..6847f5f97 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -44,6 +44,10 @@ extern NDIS_STRING ovsExtFriendlyNameUC; static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); +static NTSTATUS OvsCreateNewNBLsFromMultipleNBs( + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *curNbl, + PNET_BUFFER_LIST *nextNbl); __inline VOID OvsInitCompletionList(OvsCompletionList *completionList, @@ -237,6 +241,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, OvsFlowKey key; UINT64 hash; PNET_BUFFER curNb; + POVS_BUFFER_CONTEXT ctx; nextNbl = curNbl->Next; curNbl->Next = NULL; @@ -258,18 +263,36 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, } #endif /* NDIS_SUPPORT_NDIS640 */ - /* Ethernet Header is a guaranteed safe access. */ - curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); - if (curNb->Next != NULL) { - /* XXX: This case is not handled yet. */ + ctx = OvsInitExternalNBLContext(switchContext, curNbl, + sourcePort == switchContext->virtualExternalPortId); + if (ctx == NULL) { RtlInitUnicodeString(&filterReason, - L"Dropping NBLs with multiple NBs"); + L"Cannot allocate external NBL context."); + OvsStartNBLIngressError(switchContext, curNbl, sendCompleteFlags, &filterReason, NDIS_STATUS_RESOURCES); continue; - } else { - POVS_BUFFER_CONTEXT ctx; + } + + /* Ethernet Header is a guaranteed safe access. */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + if (curNb->Next != NULL) { + /* Create a NET_BUFFER_LIST for each NET_BUFFER. */ + status = OvsCreateNewNBLsFromMultipleNBs(switchContext, + &curNbl, + &nextNbl); + if (!NT_SUCCESS(status)) { + RtlInitUnicodeString(&filterReason, + L"Cannot allocate NBLs with single NB."); + + OvsStartNBLIngressError(switchContext, curNbl, + sendCompleteFlags, &filterReason, + NDIS_STATUS_RESOURCES); + continue; + } + } + { OvsFlow *flow; /* Take the DispatchLock so none of the VPORTs disconnect while @@ -280,19 +303,6 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, dispatch); - ctx = OvsInitExternalNBLContext(switchContext, curNbl, - sourcePort == switchContext->virtualExternalPortId); - if (ctx == NULL) { - RtlInitUnicodeString(&filterReason, - L"Cannot allocate external NBL context."); - - OvsStartNBLIngressError(switchContext, curNbl, - sendCompleteFlags, &filterReason, - NDIS_STATUS_RESOURCES); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - continue; - } - vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort, sourceIndex); if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { @@ -485,3 +495,43 @@ OvsExtCancelSendNBL(NDIS_HANDLE filterModuleContext, /* All send requests get completed synchronously, so there is no need to * implement this callback. */ } + +static NTSTATUS +OvsCreateNewNBLsFromMultipleNBs(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *curNbl, + PNET_BUFFER_LIST *nextNbl) +{ + NTSTATUS status = STATUS_SUCCESS; + PNET_BUFFER_LIST newNbls = NULL; + PNET_BUFFER_LIST lastNbl = NULL; + PNET_BUFFER_LIST nbl = NULL; + POVS_BUFFER_CONTEXT bufContext = NULL; + BOOLEAN error = TRUE; + + do { + /* Create new NBLs from curNbl with multiple net buffers. */ + newNbls = OvsPartialCopyToMultipleNBLs(switchContext, + *curNbl, 0, 0, TRUE); + if (NULL == newNbls) { + OVS_LOG_ERROR("Failed to allocate NBLs with single NB."); + status = NDIS_STATUS_RESOURCES; + break; + } + + nbl = newNbls; + while (nbl) { + lastNbl = nbl; + nbl = NET_BUFFER_LIST_NEXT_NBL(nbl); + } + lastNbl->Next = *nextNbl; + *nextNbl = newNbls->Next; + *curNbl = newNbls; + (*curNbl)->Next = NULL; + + OvsCompleteNBL(switchContext, *curNbl, TRUE); + + error = FALSE; + } while (error); + + return status; +} From 6b8da9e92269f59537b9e3e39d5749b64220e04d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 May 2015 11:57:35 -0700 Subject: [PATCH 061/146] odp-util: Correctly generate wildcards when formating nested attributes. When formatting netlink attributes if no mask is present a wildcarded attribute is synthesized for the purposes of later processing. In the case of nested attributes this must be done recursively, filling in the correct attributes at each level rather than just generating a set of zeros of the correct size. This is done already but it always uses the attribute type for the top level keys - this corresponds to nested ENCAP attributes. However, we have several levels of potentially nested attributes for tunnels that each have their own types. This uses an approach similar to the kernel where we have sets of tables for the type of each attribute linked together by pointers. This allows the mask generation function to automatically traverse the nested attributes and always get the right types. Signed-off-by: Jesse Gross Acked-by: Andy Zhou --- lib/odp-util.c | 175 ++++++++++++++++++++++++++++--------------------- 1 file changed, 99 insertions(+), 76 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index e6c1070f7..1f4f7f44a 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -53,6 +53,15 @@ static const char *delimiters = ", \t\r\n"; static const char *hex_chars = "0123456789abcdefABCDEF"; +struct attr_len_tbl { + int len; + const struct attr_len_tbl *next; + int next_max; +}; +#define ATTR_LEN_INVALID -1 +#define ATTR_LEN_VARIABLE -2 +#define ATTR_LEN_NESTED -3 + static int parse_odp_key_mask_attr(const char *, const struct simap *port_names, struct ofpbuf *, struct ofpbuf *); static void format_odp_key_attr(const struct nlattr *a, @@ -66,9 +75,9 @@ static void format_odp_key_attr(const struct nlattr *a, * - For an action whose argument has a fixed length, returned that * nonnegative length in bytes. * - * - For an action with a variable-length argument, returns -2. + * - For an action with a variable-length argument, returns ATTR_LEN_VARIABLE. * - * - For an invalid 'type', returns -1. */ + * - For an invalid 'type', returns ATTR_LEN_INVALID. */ static int odp_action_len(uint16_t type) { @@ -78,25 +87,25 @@ odp_action_len(uint16_t type) switch ((enum ovs_action_attr) type) { case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t); - case OVS_ACTION_ATTR_TUNNEL_PUSH: return -2; + case OVS_ACTION_ATTR_TUNNEL_PUSH: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t); - case OVS_ACTION_ATTR_USERSPACE: return -2; + case OVS_ACTION_ATTR_USERSPACE: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_PUSH_VLAN: return sizeof(struct ovs_action_push_vlan); case OVS_ACTION_ATTR_POP_VLAN: return 0; case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls); case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16); case OVS_ACTION_ATTR_RECIRC: return sizeof(uint32_t); case OVS_ACTION_ATTR_HASH: return sizeof(struct ovs_action_hash); - case OVS_ACTION_ATTR_SET: return -2; - case OVS_ACTION_ATTR_SET_MASKED: return -2; - case OVS_ACTION_ATTR_SAMPLE: return -2; + case OVS_ACTION_ATTR_SET: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: - return -1; + return ATTR_LEN_INVALID; } - return -1; + return ATTR_LEN_INVALID; } /* Returns a string form of 'attr'. The return value is either a statically @@ -621,7 +630,8 @@ format_odp_action(struct ds *ds, const struct nlattr *a) size_t size; expected_len = odp_action_len(nl_attr_type(a)); - if (expected_len != -2 && nl_attr_get_size(a) != expected_len) { + if (expected_len != ATTR_LEN_VARIABLE && + nl_attr_get_size(a) != expected_len) { ds_put_format(ds, "bad length %"PRIuSIZE", expected %d for: ", nl_attr_get_size(a), expected_len); format_generic_odp_action(ds, a); @@ -1210,45 +1220,65 @@ odp_actions_from_string(const char *s, const struct simap *port_names, return 0; } +static const struct attr_len_tbl ovs_vxlan_ext_attr_lens[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .len = 4 }, +}; + +static const struct attr_len_tbl ovs_tun_key_attr_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = 8 }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = 4 }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = 4 }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = 2 }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = 2 }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = ATTR_LEN_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = ATTR_LEN_NESTED, + .next = ovs_vxlan_ext_attr_lens , + .next_max = OVS_VXLAN_EXT_MAX}, +}; + +static const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = ATTR_LEN_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = 4 }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = 4 }, + [OVS_KEY_ATTR_DP_HASH] = { .len = 4 }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = 4 }, + [OVS_KEY_ATTR_TUNNEL] = { .len = ATTR_LEN_NESTED, + .next = ovs_tun_key_attr_lens, + .next_max = OVS_TUNNEL_KEY_ATTR_MAX }, + [OVS_KEY_ATTR_IN_PORT] = { .len = 4 }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = 2 }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = 2 }, + [OVS_KEY_ATTR_MPLS] = { .len = ATTR_LEN_VARIABLE }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = 2 }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, +}; + /* Returns the correct length of the payload for a flow key attribute of the - * specified 'type', -1 if 'type' is unknown, or -2 if the attribute's payload - * is variable length. */ + * specified 'type', ATTR_LEN_INVALID if 'type' is unknown, ATTR_LEN_VARIABLE + * if the attribute's payload is variable length, or ATTR_LEN_NESTED if the + * payload is a nested type. */ static int -odp_flow_key_attr_len(uint16_t type) +odp_key_attr_len(const struct attr_len_tbl tbl[], int max_len, uint16_t type) { - if (type > OVS_KEY_ATTR_MAX) { - return -1; + if (type > max_len) { + return ATTR_LEN_INVALID; } - switch ((enum ovs_key_attr) type) { - case OVS_KEY_ATTR_ENCAP: return -2; - case OVS_KEY_ATTR_PRIORITY: return 4; - case OVS_KEY_ATTR_SKB_MARK: return 4; - case OVS_KEY_ATTR_DP_HASH: return 4; - case OVS_KEY_ATTR_RECIRC_ID: return 4; - case OVS_KEY_ATTR_TUNNEL: return -2; - case OVS_KEY_ATTR_IN_PORT: return 4; - case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); - case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); - case OVS_KEY_ATTR_ETHERTYPE: return 2; - case OVS_KEY_ATTR_MPLS: return -2; - case OVS_KEY_ATTR_IPV4: return sizeof(struct ovs_key_ipv4); - case OVS_KEY_ATTR_IPV6: return sizeof(struct ovs_key_ipv6); - case OVS_KEY_ATTR_TCP: return sizeof(struct ovs_key_tcp); - case OVS_KEY_ATTR_TCP_FLAGS: return 2; - case OVS_KEY_ATTR_UDP: return sizeof(struct ovs_key_udp); - case OVS_KEY_ATTR_SCTP: return sizeof(struct ovs_key_sctp); - case OVS_KEY_ATTR_ICMP: return sizeof(struct ovs_key_icmp); - case OVS_KEY_ATTR_ICMPV6: return sizeof(struct ovs_key_icmpv6); - case OVS_KEY_ATTR_ARP: return sizeof(struct ovs_key_arp); - case OVS_KEY_ATTR_ND: return sizeof(struct ovs_key_nd); - - case OVS_KEY_ATTR_UNSPEC: - case __OVS_KEY_ATTR_MAX: - return -1; - } - - return -1; + return tbl[type].len; } static void @@ -1285,28 +1315,6 @@ ovs_frag_type_to_string(enum ovs_frag_type type) } } -static int -tunnel_key_attr_len(int type) -{ - switch (type) { - case OVS_TUNNEL_KEY_ATTR_ID: return 8; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: return 4; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: return 4; - case OVS_TUNNEL_KEY_ATTR_TOS: return 1; - case OVS_TUNNEL_KEY_ATTR_TTL: return 1; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; - case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; - case OVS_TUNNEL_KEY_ATTR_TP_SRC: return 2; - case OVS_TUNNEL_KEY_ATTR_TP_DST: return 2; - case OVS_TUNNEL_KEY_ATTR_OAM: return 0; - case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: return -2; - case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: return -2; - case __OVS_TUNNEL_KEY_ATTR_MAX: - return -1; - } - return -1; -} - #define GENEVE_OPT(class, type) ((OVS_FORCE uint32_t)(class) << 8 | (type)) static int parse_geneve_opts(const struct nlattr *attr) @@ -1351,7 +1359,8 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) NL_NESTED_FOR_EACH(a, left, attr) { uint16_t type = nl_attr_type(a); size_t len = nl_attr_get_size(a); - int expected_len = tunnel_key_attr_len(type); + int expected_len = odp_key_attr_len(ovs_tun_key_attr_lens, + OVS_TUNNEL_ATTR_MAX, type); if (len != expected_len && expected_len >= 0) { return ODP_FIT_ERROR; @@ -1797,8 +1806,10 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, ds_put_cstr(ds, ovs_key_attr_to_string(attr, namebuf, sizeof namebuf)); { - expected_len = odp_flow_key_attr_len(nl_attr_type(a)); - if (expected_len != -2) { + expected_len = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, nl_attr_type(a)); + if (expected_len != ATTR_LEN_VARIABLE && + expected_len != ATTR_LEN_NESTED) { bool bad_key_len = nl_attr_get_size(a) != expected_len; bool bad_mask_len = ma && nl_attr_get_size(ma) != expected_len; @@ -2045,21 +2056,27 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } static struct nlattr * -generate_all_wildcard_mask(struct ofpbuf *ofp, const struct nlattr *key) +generate_all_wildcard_mask(const struct attr_len_tbl tbl[], int max, + struct ofpbuf *ofp, const struct nlattr *key) { const struct nlattr *a; unsigned int left; int type = nl_attr_type(key); int size = nl_attr_get_size(key); - if (odp_flow_key_attr_len(type) >=0) { + if (odp_key_attr_len(tbl, max, type) != ATTR_LEN_NESTED) { nl_msg_put_unspec_zero(ofp, type, size); } else { size_t nested_mask; + if (tbl[type].next) { + tbl = tbl[type].next; + max = tbl[type].next_max; + } + nested_mask = nl_msg_start_nested(ofp, type); NL_ATTR_FOR_EACH(a, left, key, nl_attr_get_size(key)) { - generate_all_wildcard_mask(ofp, nl_attr_get(a)); + generate_all_wildcard_mask(tbl, max, ofp, nl_attr_get(a)); } nl_msg_end_nested(ofp, nested_mask); } @@ -2132,7 +2149,9 @@ odp_flow_format(const struct nlattr *key, size_t key_len, has_ethtype_key = true; } - is_nested_attr = (odp_flow_key_attr_len(attr_type) == -2); + is_nested_attr = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, attr_type) == + ATTR_LEN_NESTED; if (mask && mask_len) { ma = nl_attr_find__(mask, mask_len, nl_attr_type(a)); @@ -2141,7 +2160,9 @@ odp_flow_format(const struct nlattr *key, size_t key_len, if (verbose || !is_wildcard || is_nested_attr) { if (is_wildcard && !ma) { - ma = generate_all_wildcard_mask(&ofp, a); + ma = generate_all_wildcard_mask(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, + &ofp, a); } if (!first_field) { ds_put_char(ds, ','); @@ -3185,7 +3206,8 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); size_t len = nl_attr_get_size(nla); - int expected_len = odp_flow_key_attr_len(type); + int expected_len = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, type); if (len != expected_len && expected_len >= 0) { continue; @@ -3308,7 +3330,8 @@ parse_flow_nlattrs(const struct nlattr *key, size_t key_len, NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); size_t len = nl_attr_get_size(nla); - int expected_len = odp_flow_key_attr_len(type); + int expected_len = odp_key_attr_len(ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, type); if (len != expected_len && expected_len >= 0) { char namebuf[OVS_KEY_ATTR_BUFSIZE]; From 65da723b40a5fdeff6c63c94758fb4121d89fe8a Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 16 May 2015 22:08:20 -0700 Subject: [PATCH 062/146] odp-util: Format tunnel attributes directly from netlink. When we format most netlink attributes we do so from the netlink itself, iterating through each one and printing the contents out. However, for tunnels we don't do this - we first convert to the OVS userspace representation and then format that. While convienient, this isn't really ideal as the primary use of printing netlink attributes is debugging and this conversion is lossy, particularly when the attributes aren't as expected. The result is that unexpected keys are silently ignored and the level of detail on errors is minimal. This situation becomes worse when we introduce support for Geneve. The conversion to userspace format requires additional information which we might not have (ovs-dpctl) and is more complicated than other attributes so it is likely to be confusing in the event of a bug. The information from the kernel is self-describing so it's much more reliable to display it directly from the netlink. This converts tunnel attribute formatting to be more similar to other types of attributes. As a nice bonus the output becomes more compact because it doesn't print zeroed out attributes in cases where they aren't relevant and therefore not present. Signed-off-by: Jesse Gross Acked-by: Andy Zhou --- lib/odp-util.c | 439 +++++++++++++++++++++++++++++++++++++++--------- tests/odp.at | 18 +- tests/tunnel.at | 82 ++++----- 3 files changed, 412 insertions(+), 127 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 1f4f7f44a..a39933b9f 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -69,6 +69,9 @@ static void format_odp_key_attr(const struct nlattr *a, const struct hmap *portno_names, struct ds *ds, bool verbose); +static struct nlattr *generate_all_wildcard_mask(const struct attr_len_tbl tbl[], + int max, struct ofpbuf *, + const struct nlattr *key); /* Returns one the following for the action with the given OVS_ACTION_ATTR_* * 'type': * @@ -1770,6 +1773,228 @@ format_tun_flags(struct ds *ds, const char *name, uint16_t key, } } +static bool +check_attr_len(struct ds *ds, const struct nlattr *a, const struct nlattr *ma, + const struct attr_len_tbl tbl[], int max_len, bool need_key) +{ + int expected_len; + + expected_len = odp_key_attr_len(tbl, max_len, nl_attr_type(a)); + if (expected_len != ATTR_LEN_VARIABLE && + expected_len != ATTR_LEN_NESTED) { + + bool bad_key_len = nl_attr_get_size(a) != expected_len; + bool bad_mask_len = ma && nl_attr_get_size(ma) != expected_len; + + if (bad_key_len || bad_mask_len) { + if (need_key) { + ds_put_format(ds, "key%u", nl_attr_type(a)); + } + if (bad_key_len) { + ds_put_format(ds, "(bad key length %"PRIuSIZE", expected %d)(", + nl_attr_get_size(a), expected_len); + } + format_generic_odp_key(a, ds); + if (ma) { + ds_put_char(ds, '/'); + if (bad_mask_len) { + ds_put_format(ds, "(bad mask length %"PRIuSIZE", expected %d)(", + nl_attr_get_size(ma), expected_len); + } + format_generic_odp_key(ma, ds); + } + ds_put_char(ds, ')'); + return false; + } + } + + return true; +} + +static void +format_unknown_key(struct ds *ds, const struct nlattr *a, + const struct nlattr *ma) +{ + ds_put_format(ds, "key%u(", nl_attr_type(a)); + format_generic_odp_key(a, ds); + if (ma && !odp_mask_attr_is_exact(ma)) { + ds_put_char(ds, '/'); + format_generic_odp_key(ma, ds); + } + ds_put_cstr(ds, "),"); +} + +static void +format_odp_tun_vxlan_opt(const struct nlattr *attr, + const struct nlattr *mask_attr, struct ds *ds, + bool verbose) +{ + unsigned int left; + const struct nlattr *a; + struct ofpbuf ofp; + + ofpbuf_init(&ofp, 100); + NL_NESTED_FOR_EACH(a, left, attr) { + uint16_t type = nl_attr_type(a); + const struct nlattr *ma = NULL; + + if (mask_attr) { + ma = nl_attr_find__(nl_attr_get(mask_attr), + nl_attr_get_size(mask_attr), type); + if (!ma) { + ma = generate_all_wildcard_mask(ovs_vxlan_ext_attr_lens, + OVS_VXLAN_EXT_MAX, + &ofp, a); + } + } + + if (!check_attr_len(ds, a, ma, ovs_vxlan_ext_attr_lens, + OVS_VXLAN_EXT_MAX, true)) { + continue; + } + + switch (type) { + case OVS_VXLAN_EXT_GBP: { + uint32_t key = nl_attr_get_u32(a); + ovs_be16 id, id_mask; + uint8_t flags, flags_mask; + + id = htons(key & 0xFFFF); + flags = (key >> 16) & 0xFF; + if (ma) { + uint32_t mask = nl_attr_get_u32(ma); + id_mask = htons(mask & 0xFFFF); + flags_mask = (mask >> 16) & 0xFF; + } + + ds_put_cstr(ds, "gbp("); + format_be16(ds, "id", id, ma ? &id_mask : NULL, verbose); + format_u8x(ds, "flags", flags, ma ? &flags_mask : NULL, verbose); + ds_chomp(ds, ','); + ds_put_cstr(ds, "),"); + break; + } + + default: + format_unknown_key(ds, a, ma); + } + ofpbuf_clear(&ofp); + } + + ds_chomp(ds, ','); + ofpbuf_uninit(&ofp); +} + +static void +format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr, + struct ds *ds, bool verbose) +{ + unsigned int left; + const struct nlattr *a; + uint16_t flags = 0; + uint16_t mask_flags = 0; + struct ofpbuf ofp; + + ofpbuf_init(&ofp, 100); + NL_NESTED_FOR_EACH(a, left, attr) { + enum ovs_tunnel_key_attr type = nl_attr_type(a); + const struct nlattr *ma = NULL; + + if (mask_attr) { + ma = nl_attr_find__(nl_attr_get(mask_attr), + nl_attr_get_size(mask_attr), type); + if (!ma) { + ma = generate_all_wildcard_mask(ovs_tun_key_attr_lens, + OVS_TUNNEL_KEY_ATTR_MAX, + &ofp, a); + } + } + + if (!check_attr_len(ds, a, ma, ovs_tun_key_attr_lens, + OVS_TUNNEL_KEY_ATTR_MAX, true)) { + continue; + } + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + format_be64(ds, "tun_id", nl_attr_get_be64(a), + ma ? nl_attr_get(ma) : NULL, verbose); + flags |= FLOW_TNL_F_KEY; + if (ma) { + mask_flags |= FLOW_TNL_F_KEY; + } + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + format_ipv4(ds, "src", nl_attr_get_be32(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + format_ipv4(ds, "dst", nl_attr_get_be32(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + format_u8x(ds, "tos", nl_attr_get_u8(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + format_u8u(ds, "ttl", nl_attr_get_u8(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + flags |= FLOW_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + flags |= FLOW_TNL_F_CSUM; + break; + case OVS_TUNNEL_KEY_ATTR_TP_SRC: + format_be16(ds, "tp_src", nl_attr_get_be16(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_TP_DST: + format_be16(ds, "tp_dst", nl_attr_get_be16(a), + ma ? nl_attr_get(ma) : NULL, verbose); + break; + case OVS_TUNNEL_KEY_ATTR_OAM: + flags |= FLOW_TNL_F_OAM; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + ds_put_cstr(ds, "vxlan("); + format_odp_tun_vxlan_opt(a, ma, ds, verbose); + ds_put_cstr(ds, "),"); + break; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + /* Not really implemented yet, handle as unknown. */ + case __OVS_TUNNEL_KEY_ATTR_MAX: + default: + format_unknown_key(ds, a, ma); + } + ofpbuf_clear(&ofp); + } + + /* Flags can have a valid mask even if the attribute is not set, so + * we need to collect these separately. */ + if (mask_attr) { + NL_NESTED_FOR_EACH(a, left, mask_attr) { + switch (nl_attr_type(a)) { + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + mask_flags |= FLOW_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + mask_flags |= FLOW_TNL_F_CSUM; + break; + case OVS_TUNNEL_KEY_ATTR_OAM: + mask_flags |= FLOW_TNL_F_OAM; + break; + } + } + } + + format_tun_flags(ds, "flags", flags, mask_attr ? &mask_flags : NULL, + verbose); + ds_chomp(ds, ','); + ofpbuf_uninit(&ofp); +} + static void format_frag(struct ds *ds, const char *name, uint8_t key, const uint8_t *mask, bool verbose) @@ -1798,39 +2023,15 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, { enum ovs_key_attr attr = nl_attr_type(a); char namebuf[OVS_KEY_ATTR_BUFSIZE]; - int expected_len; bool is_exact; is_exact = ma ? odp_mask_attr_is_exact(ma) : true; ds_put_cstr(ds, ovs_key_attr_to_string(attr, namebuf, sizeof namebuf)); - { - expected_len = odp_key_attr_len(ovs_flow_key_attr_lens, - OVS_KEY_ATTR_MAX, nl_attr_type(a)); - if (expected_len != ATTR_LEN_VARIABLE && - expected_len != ATTR_LEN_NESTED) { - bool bad_key_len = nl_attr_get_size(a) != expected_len; - bool bad_mask_len = ma && nl_attr_get_size(ma) != expected_len; - - if (bad_key_len || bad_mask_len) { - if (bad_key_len) { - ds_put_format(ds, "(bad key length %"PRIuSIZE", expected %d)(", - nl_attr_get_size(a), expected_len); - } - format_generic_odp_key(a, ds); - if (ma) { - ds_put_char(ds, '/'); - if (bad_mask_len) { - ds_put_format(ds, "(bad mask length %"PRIuSIZE", expected %d)(", - nl_attr_get_size(ma), expected_len); - } - format_generic_odp_key(ma, ds); - } - ds_put_char(ds, ')'); - return; - } - } + if (!check_attr_len(ds, a, ma, ovs_flow_key_attr_lens, + OVS_KEY_ATTR_MAX, false)) { + return; } ds_put_char(ds, '('); @@ -1856,32 +2057,10 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } break; - case OVS_KEY_ATTR_TUNNEL: { - struct flow_tnl key, mask_; - struct flow_tnl *mask = ma ? &mask_ : NULL; - - if (mask) { - memset(mask, 0, sizeof *mask); - odp_tun_key_from_attr(ma, mask); - } - memset(&key, 0, sizeof key); - if (odp_tun_key_from_attr(a, &key) == ODP_FIT_ERROR) { - ds_put_format(ds, "error"); - return; - } - format_be64(ds, "tun_id", key.tun_id, MASK(mask, tun_id), verbose); - format_ipv4(ds, "src", key.ip_src, MASK(mask, ip_src), verbose); - format_ipv4(ds, "dst", key.ip_dst, MASK(mask, ip_dst), verbose); - format_u8x(ds, "tos", key.ip_tos, MASK(mask, ip_tos), verbose); - format_u8u(ds, "ttl", key.ip_ttl, MASK(mask, ip_ttl), verbose); - format_be16(ds, "tp_src", key.tp_src, MASK(mask, tp_src), verbose); - format_be16(ds, "tp_dst", key.tp_dst, MASK(mask, tp_dst), verbose); - format_be16(ds, "gbp_id", key.gbp_id, MASK(mask, gbp_id), verbose); - format_u8x(ds, "gbp_flags", key.gbp_flags, MASK(mask, gbp_flags), verbose); - format_tun_flags(ds, "flags", key.flags, MASK(mask, flags), verbose); - ds_chomp(ds, ','); + case OVS_KEY_ATTR_TUNNEL: + format_odp_tun_attr(a, ma, ds, verbose); break; - } + case OVS_KEY_ATTR_IN_PORT: if (portno_names && verbose && is_exact) { char *name = odp_portno_names_get(portno_names, @@ -2622,14 +2801,84 @@ scan_mpls_bos(const char *s, ovs_be32 *key, ovs_be32 *mask) return scan_be32_bf(s, key, mask, 1, MPLS_BOS_SHIFT); } -/* ATTR is compile-time constant, so only the case with correct data type - * will be used. However, the compiler complains about the data type for - * the other cases, so we must cast to make the compiler silent. */ -#define SCAN_PUT_ATTR(BUF, ATTR, DATA) \ - if ((ATTR) == OVS_KEY_ATTR_TUNNEL) { \ - tun_key_to_attr(BUF, (const struct flow_tnl *)(void *)&(DATA)); \ - } else { \ - nl_msg_put_unspec(BUF, ATTR, &(DATA), sizeof (DATA)); \ +static int +scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) +{ + const char *s_base = s; + ovs_be16 id, id_mask; + uint8_t flags, flags_mask; + + if (!strncmp(s, "id=", 3)) { + s += 3; + s += scan_be16(s, &id, mask ? &id_mask : NULL); + } else if (mask) { + memset(&id_mask, 0, sizeof id_mask); + } + + if (s[0] == ',') { + s++; + } + if (!strncmp(s, "flags=", 6)) { + s += 6; + s += scan_u8(s, &flags, mask ? &flags_mask : NULL); + } else if (mask) { + memset(&flags_mask, 0, sizeof flags_mask); + } + + if (!strncmp(s, "))", 2)) { + s += 2; + + *key = (flags << 16) | ntohs(id); + if (mask) { + *mask = (flags_mask << 16) | ntohs(id_mask); + } + + return s - s_base; + } + + return 0; +} + +static void +tun_flags_to_attr(struct ofpbuf *a, const void *data_) +{ + const uint16_t *flags = data_; + + if (*flags & FLOW_TNL_F_DONT_FRAGMENT) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT); + } + if (*flags & FLOW_TNL_F_CSUM) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_CSUM); + } + if (*flags & FLOW_TNL_F_OAM) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_OAM); + } +} + +static void +vxlan_gbp_to_attr(struct ofpbuf *a, const void *data_) +{ + const uint32_t *gbp = data_; + + if (*gbp) { + size_t vxlan_opts_ofs; + + vxlan_opts_ofs = nl_msg_start_nested(a, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + nl_msg_put_u32(a, OVS_VXLAN_EXT_GBP, *gbp); + nl_msg_end_nested(a, vxlan_opts_ofs); + } +} + +#define SCAN_PUT_ATTR(BUF, ATTR, DATA, FUNC) \ + { \ + unsigned long call_fn = (unsigned long)FUNC; \ + if (call_fn) { \ + typedef void (*fn)(struct ofpbuf *, const void *); \ + fn func = FUNC; \ + func(BUF, &(DATA)); \ + } else { \ + nl_msg_put_unspec(BUF, ATTR, &(DATA), sizeof (DATA)); \ + } \ } #define SCAN_IF(NAME) \ @@ -2693,23 +2942,60 @@ scan_mpls_bos(const char *s, ovs_be32 *key, ovs_be32 *mask) return -EINVAL; \ } -#define SCAN_PUT(ATTR) \ +/* Beginning of nested attribute. */ +#define SCAN_BEGIN_NESTED(NAME, ATTR) \ + SCAN_IF(NAME); \ + size_t key_offset, mask_offset; \ + key_offset = nl_msg_start_nested(key, ATTR); \ + if (mask) { \ + mask_offset = nl_msg_start_nested(mask, ATTR); \ + } \ + do { \ + len = 0; + +#define SCAN_END_NESTED() \ + SCAN_FINISH(); \ + nl_msg_end_nested(key, key_offset); \ + if (mask) { \ + nl_msg_end_nested(mask, mask_offset); \ + } \ + return s - start; \ + } + +#define SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, ATTR, FUNC) \ + if (strncmp(s, NAME, strlen(NAME)) == 0) { \ + TYPE skey, smask; \ + memset(&skey, 0, sizeof skey); \ + memset(&smask, 0xff, sizeof smask); \ + s += strlen(NAME); \ + SCAN_TYPE(SCAN_AS, &skey, &smask); \ + SCAN_PUT(ATTR, FUNC); \ + continue; \ + } + +#define SCAN_FIELD_NESTED(NAME, TYPE, SCAN_AS, ATTR) \ + SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, ATTR, NULL) + +#define SCAN_FIELD_NESTED_FUNC(NAME, TYPE, SCAN_AS, FUNC) \ + SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, 0, FUNC) + +#define SCAN_PUT(ATTR, FUNC) \ if (!mask || !is_all_zeros(&smask, sizeof smask)) { \ - SCAN_PUT_ATTR(key, ATTR, skey); \ + SCAN_PUT_ATTR(key, ATTR, skey, FUNC); \ if (mask) { \ - SCAN_PUT_ATTR(mask, ATTR, smask); \ + SCAN_PUT_ATTR(mask, ATTR, smask, FUNC); \ } \ } #define SCAN_END(ATTR) \ SCAN_FINISH(); \ - SCAN_PUT(ATTR); \ + SCAN_PUT(ATTR, NULL); \ return s - start; \ } #define SCAN_END_SINGLE(ATTR) \ SCAN_FINISH_SINGLE(); \ - SCAN_PUT(ATTR); \ + SCAN_PUT(ATTR, NULL); \ return s - start; \ } @@ -2754,18 +3040,17 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, OVS_KEY_ATTR_RECIRC_ID); SCAN_SINGLE("dp_hash(", uint32_t, u32, OVS_KEY_ATTR_DP_HASH); - SCAN_BEGIN("tunnel(", struct flow_tnl) { - SCAN_FIELD("tun_id=", be64, tun_id); - SCAN_FIELD("src=", ipv4, ip_src); - SCAN_FIELD("dst=", ipv4, ip_dst); - SCAN_FIELD("tos=", u8, ip_tos); - SCAN_FIELD("ttl=", u8, ip_ttl); - SCAN_FIELD("tp_src=", be16, tp_src); - SCAN_FIELD("tp_dst=", be16, tp_dst); - SCAN_FIELD("gbp_id=", be16, gbp_id); - SCAN_FIELD("gbp_flags=", u8, gbp_flags); - SCAN_FIELD("flags(", tun_flags, flags); - } SCAN_END(OVS_KEY_ATTR_TUNNEL); + SCAN_BEGIN_NESTED("tunnel(", OVS_KEY_ATTR_TUNNEL) { + SCAN_FIELD_NESTED("tun_id=", ovs_be64, be64, OVS_TUNNEL_KEY_ATTR_ID); + SCAN_FIELD_NESTED("src=", ovs_be32, ipv4, OVS_TUNNEL_KEY_ATTR_IPV4_SRC); + SCAN_FIELD_NESTED("dst=", ovs_be32, ipv4, OVS_TUNNEL_KEY_ATTR_IPV4_DST); + SCAN_FIELD_NESTED("tos=", uint8_t, u8, OVS_TUNNEL_KEY_ATTR_TOS); + SCAN_FIELD_NESTED("ttl=", uint8_t, u8, OVS_TUNNEL_KEY_ATTR_TTL); + SCAN_FIELD_NESTED("tp_src=", ovs_be16, be16, OVS_TUNNEL_KEY_ATTR_TP_SRC); + SCAN_FIELD_NESTED("tp_dst=", ovs_be16, be16, OVS_TUNNEL_KEY_ATTR_TP_DST); + SCAN_FIELD_NESTED_FUNC("vxlan(gbp(", uint32_t, vxlan_gbp, vxlan_gbp_to_attr); + SCAN_FIELD_NESTED_FUNC("flags(", uint16_t, tun_flags, tun_flags_to_attr); + } SCAN_END_NESTED(); SCAN_SINGLE_PORT("in_port(", uint32_t, OVS_KEY_ATTR_IN_PORT); diff --git a/tests/odp.at b/tests/odp.at index 16a58e7d9..c3cea2de0 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -39,7 +39,7 @@ s/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/ echo echo '# Valid forms with tunnel header.' - sed 's/^/skb_priority(0),tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(csum,key)),skb_mark(0x1234),recirc_id(0),dp_hash(0),/' odp-base.txt + sed 's/^/skb_priority(0),tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,ttl=64,flags(csum,key)),skb_mark(0x1234),recirc_id(0),dp_hash(0),/' odp-base.txt echo echo '# Valid forms with VLAN header.' @@ -59,13 +59,13 @@ s/\(eth([[^)]]*),?\)/\1,eth_type(0x8848),mpls(label=100,tc=7,ttl=64,bos=1)/' odp echo echo '# Valid forms with tunnel and VLAN headers.' - sed 's/^/skb_priority(0),tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0),tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,flags(key)),skb_mark(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with QOS priority, tunnel, and VLAN headers.' - sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,flags(key)),skb_mark(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt @@ -117,11 +117,11 @@ skb_mark(0x1234/0xfff0),in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14: echo echo '# Valid forms with tunnel header.' - sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(csum,key)),/' odp-base.txt + sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,ttl=64,vxlan(gbp(id=10\/0xff,flags=0xb)),flags(csum,key)),/' odp-base.txt echo echo '# Valid forms with tunnel header (wildcard flag).' - sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(-df+csum+key)),/' odp-base.txt + sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,ttl=64,flags(-df+csum+key)),/' odp-base.txt echo echo '# Valid forms with VLAN header.' @@ -138,13 +138,13 @@ s/$/)/' odp-base.txt echo echo '# Valid forms with tunnel and VLAN headers.' - sed 's/^/tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),/ + sed 's/^/tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,flags(key)),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99/0xff0,pcp=7/0xe),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with QOS priority, tunnel, and VLAN headers.' - sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),/ + sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,flags(key)),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt @@ -280,8 +280,8 @@ push_vlan(tpid=0x9100,vid=13,pcp=5) push_vlan(tpid=0x9100,vid=13,pcp=5,cfi=0) pop_vlan sample(sample=9.7%,actions(1,2,3,push_vlan(vid=1,pcp=2))) -set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,csum,key))) -set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key))) +set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(df,csum,key))) +set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key))) tnl_pop(4) tnl_push(tnl_port(4),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0x2000,proto=0x6558),key=0x1e241)),out_port(1)) tnl_push(tnl_port(4),header(size=46,type=3,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0xa000,proto=0x6558),csum=0x0,key=0x1e241)),out_port(1)) diff --git a/tests/tunnel.at b/tests/tunnel.at index fae2fac9f..7ff1ba4ea 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -23,15 +23,15 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl ]) dnl remote_ip -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=1.2.3.4,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=1.2.3.4,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df))),1 + [Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,flags(df))),1 ]) dnl local_ip, remote_ip -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=2.2.2.2,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df))),1 + [Datapath actions: set(tunnel(src=2.2.2.2,dst=1.1.1.1,ttl=64,flags(df))),1 ]) dnl reconfigure, local_ip, remote_ip @@ -44,17 +44,17 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl p2 2/1: (gre: csum=true, df_default=false, local_ip=2.2.2.3, remote_ip=1.1.1.1, ttl=1) p3 3/64: (gre64: remote_ip=2.2.2.2) ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df))),1 + [Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,flags(df))),1 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.3,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=2.2.2.3,dst=1.1.1.1,tos=0,ttl=1,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(csum))),1 + [Datapath actions: set(tunnel(src=2.2.2.3,dst=1.1.1.1,ttl=1,flags(csum))),1 ]) dnl nonexistent tunnel -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=5.5.5.5,dst=6.6.6.6,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [2], [ignore], [dnl +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=5.5.5.5,dst=6.6.6.6,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [2], [ignore], [dnl Invalid datapath flow ovs-appctl: ovs-vswitchd: server returned an error ]) @@ -80,28 +80,28 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl ]) dnl Tunnel CE and encapsulated packet CE -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=3,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=3,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], [Megaflow: pkt_mark=0,recirc_id=0,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=3,tun_ttl=64,,in_port=1,nw_ecn=3,nw_frag=no Datapath actions: 2 ]) dnl Tunnel CE and encapsulated packet ECT(1) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=1,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=1,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], [Megaflow: pkt_mark=0,recirc_id=0,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=3,tun_ttl=64,,in_port=1,nw_ecn=1,nw_frag=no Datapath actions: set(ipv4(tos=0x3/0x3)),2 ]) dnl Tunnel CE and encapsulated packet ECT(2) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=2,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=2,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], [Megaflow: pkt_mark=0,recirc_id=0,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=3,tun_ttl=64,,in_port=1,nw_ecn=2,nw_frag=no Datapath actions: set(ipv4(tos=0x3/0x3)),2 ]) dnl Tunnel CE and encapsulated packet Non-ECT -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], [Megaflow: pkt_mark=0,recirc_id=0,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=3,tun_ttl=64,,in_port=1,nw_ecn=0,nw_frag=no Datapath actions: drop @@ -131,13 +131,13 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl dnl Basic AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=4,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 + [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,ttl=64,flags(df,key))),1 ]) dnl ECN AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=1,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,tos=0x1,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 + [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,tos=0x1,ttl=64,flags(df,key))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -164,19 +164,19 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl dnl Basic AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=4,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x4,ttl=128,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df))),1 + [Datapath actions: set(tunnel(dst=1.1.1.1,tos=0x4,ttl=128,flags(df))),1 ]) dnl ECN AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=5,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x5,ttl=128,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df))),1 + [Datapath actions: set(tunnel(dst=1.1.1.1,tos=0x5,ttl=128,flags(df))),1 ]) dnl non-IP AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df))),1 + [Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,flags(df))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -208,10 +208,10 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl -set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x2,src=0.0.0.0,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x3,src=0.0.0.0,dst=3.3.3.3,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x5,src=0.0.0.0,dst=4.4.4.4,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 +set(tunnel(tun_id=0x1,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x2,dst=2.2.2.2,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x3,dst=3.3.3.3,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x5,dst=4.4.4.4,ttl=64,flags(df,key))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -238,28 +238,28 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl p3 3/1: (gre: out_key=5, remote_ip=1.1.1.1) ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x1,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x1,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl -set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x3,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x5,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 +set(tunnel(tun_id=0x1,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x3,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x5,dst=1.1.1.1,ttl=64,flags(df,key))),1 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x2,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x2,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl -set(tunnel(tun_id=0x3,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x5,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 +set(tunnel(tun_id=0x3,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x1,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x5,dst=1.1.1.1,ttl=64,flags(df,key))),1 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl -set(tunnel(tun_id=0x5,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,dnl -set(tunnel(tun_id=0x3,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 +set(tunnel(tun_id=0x5,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x1,dst=1.1.1.1,ttl=64,flags(df,key))),1,dnl +set(tunnel(tun_id=0x3,dst=1.1.1.1,ttl=64,flags(df,key))),1 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0xf,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [2], [ignore], [dnl +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0xf,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [2], [ignore], [dnl Invalid datapath flow ovs-appctl: ovs-vswitchd: server returned an error ]) @@ -293,22 +293,22 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl p5 5/5: (dummy) ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x2,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x2,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 3 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x3,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x3,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl -Datapath actions: 4,3,set(tunnel(tun_id=0x3,src=0.0.0.0,dst=3.3.3.3,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,5 +Datapath actions: 4,3,set(tunnel(tun_id=0x3,dst=3.3.3.3,ttl=64,flags(df,key))),1,5 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x3,src=3.3.3.3,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x3,src=3.3.3.3,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 4,3,5 ]) -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: drop ]) @@ -407,7 +407,7 @@ in_port=5 actions=set_field:5->tun_id AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], - [Datapath actions: set(tunnel(tun_id=0x2a,src=0.0.0.0,dst=1.1.1.1,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=0.0.0.0,dst=3.3.3.3,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=1.1.1.1,dst=4.4.4.4,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1,set(tunnel(tun_id=0x3,src=0.0.0.0,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,gbp_id=0,gbp_flags=0,flags(df,key))),1 + [Datapath actions: set(tunnel(tun_id=0x2a,dst=1.1.1.1,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x2a,dst=3.3.3.3,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=1.1.1.1,dst=4.4.4.4,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x3,dst=2.2.2.2,ttl=64,flags(df,key))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP From e7ae59f9900ede275edfffc82d3a0d0110fd826d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 May 2015 18:47:21 -0700 Subject: [PATCH 063/146] util: Library routines for printing and scanning large hex integers. Geneve options are variable length and up to 124 bytes long, which means that they can't be easily manipulated by the integer string functions like we do for other fields. This adds a few helper routines to make these operations easier. Signed-off-by: Jesse Gross Acked-by: Andy Zhou --- lib/dynamic-string.c | 23 +++++++++++++ lib/dynamic-string.h | 1 + lib/learn.c | 23 +++---------- lib/meta-flow.c | 13 +------ lib/util.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ lib/util.h | 3 ++ 6 files changed, 113 insertions(+), 31 deletions(-) diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c index 914af64c1..a6c8f6c76 100644 --- a/lib/dynamic-string.c +++ b/lib/dynamic-string.c @@ -361,6 +361,29 @@ ds_swap(struct ds *a, struct ds *b) *b = temp; } +void +ds_put_hex(struct ds *ds, const void *buf_, size_t size) +{ + const uint8_t *buf = buf_; + bool printed = false; + int i; + + for (i = 0; i < size; i++) { + uint8_t val = buf[i]; + if (val || printed) { + if (!printed) { + ds_put_format(ds, "0x%"PRIx8, val); + } else { + ds_put_format(ds, "%02"PRIx8, val); + } + printed = true; + } + } + if (!printed) { + ds_put_char(ds, '0'); + } +} + /* Writes the 'size' bytes in 'buf' to 'string' as hex bytes arranged 16 per * line. Numeric offsets are also included, starting at 'ofs' for the first * byte in 'buf'. If 'ascii' is true then the corresponding ASCII characters diff --git a/lib/dynamic-string.h b/lib/dynamic-string.h index dc5981ac2..95172d102 100644 --- a/lib/dynamic-string.h +++ b/lib/dynamic-string.h @@ -55,6 +55,7 @@ void ds_put_format(struct ds *, const char *, ...) OVS_PRINTF_FORMAT(2, 3); void ds_put_format_valist(struct ds *, const char *, va_list) OVS_PRINTF_FORMAT(2, 0); void ds_put_printable(struct ds *, const char *, size_t); +void ds_put_hex(struct ds *ds, const void *buf, size_t size); void ds_put_hex_dump(struct ds *ds, const void *buf_, size_t size, uintptr_t ofs, bool ascii); int ds_get_line(struct ds *, FILE *); diff --git a/lib/learn.c b/lib/learn.c index 99d56e601..8ff1e0a89 100644 --- a/lib/learn.c +++ b/lib/learn.c @@ -190,29 +190,14 @@ static char * OVS_WARN_UNUSED_RESULT learn_parse_load_immediate(const char *s, struct ofpact_learn_spec *spec) { const char *full_s = s; - const char *arrow = strstr(s, "->"); struct mf_subfield dst; union mf_subvalue imm; char *error; + int err; - memset(&imm, 0, sizeof imm); - if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X') && arrow) { - const char *in = arrow - 1; - uint8_t *out = imm.u8 + sizeof imm.u8 - 1; - int n = arrow - (s + 2); - int i; - - for (i = 0; i < n; i++) { - int hexit = hexit_value(in[-i]); - if (hexit < 0) { - return xasprintf("%s: bad hex digit in value", full_s); - } - out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit; - } - s = arrow; - } else { - ovs_be64 *last_be64 = &imm.be64[ARRAY_SIZE(imm.be64) - 1]; - *last_be64 = htonll(strtoull(s, (char **) &s, 0)); + err = parse_int_string(s, imm.u8, sizeof imm.u8, (char **) &s); + if (err) { + return xasprintf("%s: bad hex digit in value", full_s); } if (strncmp(s, "->", 2)) { diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 124b5256c..757843dfb 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -2300,18 +2300,7 @@ mf_get_subfield(const struct mf_subfield *sf, const struct flow *flow) void mf_format_subvalue(const union mf_subvalue *subvalue, struct ds *s) { - int i; - - for (i = 0; i < ARRAY_SIZE(subvalue->u8); i++) { - if (subvalue->u8[i]) { - ds_put_format(s, "0x%"PRIx8, subvalue->u8[i]); - for (i++; i < ARRAY_SIZE(subvalue->u8); i++) { - ds_put_format(s, "%02"PRIx8, subvalue->u8[i]); - } - return; - } - } - ds_put_char(s, '0'); + ds_put_hex(s, subvalue->u8, sizeof subvalue->u8); } void diff --git a/lib/util.c b/lib/util.c index bcf770051..c7e2b77f5 100644 --- a/lib/util.c +++ b/lib/util.c @@ -738,6 +738,87 @@ hexits_value(const char *s, size_t n, bool *ok) return value; } +/* Parses the string in 's' as an integer in either hex or decimal format and + * puts the result right justified in the array 'valuep' that is 'field_width' + * big. If the string is in hex format, the value may be arbitrarily large; + * integers are limited to 64-bit values. (The rationale is that decimal is + * likely to represent a number and 64 bits is a reasonable maximum whereas + * hex could either be a number or a byte string.) + * + * On return 'tail' points to the first character in the string that was + * not parsed as part of the value. ERANGE is returned if the value is too + * large to fit in the given field. */ +int +parse_int_string(const char *s, uint8_t *valuep, int field_width, char **tail) +{ + unsigned long long int integer; + int i; + + if (!strncmp(s, "0x", 2) || !strncmp(s, "0X", 2)) { + uint8_t *hexit_str; + int len = 0; + int val_idx; + int err = 0; + + s += 2; + hexit_str = xmalloc(field_width * 2); + + for (;;) { + uint8_t hexit; + bool ok; + + s += strspn(s, " \t\r\n"); + hexit = hexits_value(s, 1, &ok); + if (!ok) { + *tail = CONST_CAST(char *, s); + break; + } + + if (hexit != 0 || len) { + if (DIV_ROUND_UP(len + 1, 2) > field_width) { + err = ERANGE; + goto free; + } + + hexit_str[len] = hexit; + len++; + } + s++; + } + + val_idx = field_width; + for (i = len - 1; i >= 0; i -= 2) { + val_idx--; + valuep[val_idx] = hexit_str[i]; + if (i > 0) { + valuep[val_idx] += hexit_str[i - 1] << 4; + } + } + + memset(valuep, 0, val_idx); + +free: + free(hexit_str); + return err; + } + + errno = 0; + integer = strtoull(s, tail, 0); + if (errno) { + return errno; + } + + for (i = field_width - 1; i >= 0; i--) { + valuep[i] = integer; + integer >>= 8; + } + if (integer) { + return ERANGE; + } + + return 0; +} + /* Returns the current working directory as a malloc()'d string, or a null * pointer if the current working directory cannot be determined. */ char * diff --git a/lib/util.h b/lib/util.h index 276edb569..78abfd388 100644 --- a/lib/util.h +++ b/lib/util.h @@ -314,6 +314,9 @@ bool str_to_double(const char *, double *); int hexit_value(int c); uintmax_t hexits_value(const char *s, size_t n, bool *ok); +int parse_int_string(const char *s, uint8_t *valuep, int field_width, + char **tail); + const char *english_list_delimiter(size_t index, size_t total); char *get_cwd(void); From 622a0a8e764d2b1fa6dd8b87f6b282baaac2a9d9 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 18 May 2015 16:03:01 -0700 Subject: [PATCH 064/146] odp-util: Geneve netlink decoding. Even though userspace does not yet support Geneve options, the kernel does and there is some basic support for decoding those attributes. This adds the ability to print Geneve attributes that might potentially come from the kernel. Signed-off-by: Jesse Gross Acked-by: Andy Zhou --- lib/odp-util.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++- tests/odp.at | 4 + 2 files changed, 204 insertions(+), 3 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index a39933b9f..c3497ea26 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1753,6 +1753,23 @@ format_be16(struct ds *ds, const char *name, ovs_be16 key, } } +static void +format_be16x(struct ds *ds, const char *name, ovs_be16 key, + const ovs_be16 *mask, bool verbose) +{ + bool mask_empty = mask && !*mask; + + if (verbose || !mask_empty) { + bool mask_full = !mask || *mask == OVS_BE16_MAX; + + ds_put_format(ds, "%s=%#"PRIx16, name, ntohs(key)); + if (!mask_full) { /* Partially masked. */ + ds_put_format(ds, "/%#"PRIx16, ntohs(*mask)); + } + ds_put_char(ds, ','); + } +} + static void format_tun_flags(struct ds *ds, const char *name, uint16_t key, const uint16_t *mask, bool verbose) @@ -1885,6 +1902,73 @@ format_odp_tun_vxlan_opt(const struct nlattr *attr, ofpbuf_uninit(&ofp); } +#define MASK(PTR, FIELD) PTR ? &PTR->FIELD : NULL + +static void +format_odp_tun_geneve(const struct nlattr *attr, + const struct nlattr *mask_attr, struct ds *ds, + bool verbose) +{ + int opts_len = nl_attr_get_size(attr); + const struct geneve_opt *opt = nl_attr_get(attr); + const struct geneve_opt *mask = mask_attr ? + nl_attr_get(mask_attr) : NULL; + + if (mask && nl_attr_get_size(attr) != nl_attr_get_size(mask_attr)) { + ds_put_format(ds, "value len %"PRIuSIZE" different from mask len %"PRIuSIZE, + nl_attr_get_size(attr), nl_attr_get_size(mask_attr)); + return; + } + + while (opts_len > 0) { + unsigned int len; + uint8_t data_len, data_len_mask; + + if (opts_len < sizeof *opt) { + ds_put_format(ds, "opt len %u less than minimum %"PRIuSIZE, + opts_len, sizeof *opt); + return; + } + + data_len = opt->length * 4; + if (mask) { + if (mask->length == 0x1f) { + data_len_mask = UINT8_MAX; + } else { + data_len_mask = mask->length; + } + } + len = sizeof *opt + data_len; + if (len > opts_len) { + ds_put_format(ds, "opt len %u greater than remaining %u", + len, opts_len); + return; + } + + ds_put_char(ds, '{'); + format_be16x(ds, "class", opt->opt_class, MASK(mask, opt_class), + verbose); + format_u8x(ds, "type", opt->type, MASK(mask, type), verbose); + format_u8u(ds, "len", data_len, mask ? &data_len_mask : NULL, verbose); + if (verbose || !mask || !is_all_zeros(mask + 1, data_len)) { + ds_put_hex(ds, opt + 1, data_len); + if (mask && !is_all_ones(mask + 1, data_len)) { + ds_put_char(ds, '/'); + ds_put_hex(ds, mask + 1, data_len); + } + } else { + ds_chomp(ds, ','); + } + ds_put_char(ds, '}'); + + opt += len / sizeof(*opt); + if (mask) { + mask += len / sizeof(*opt); + } + opts_len -= len; + }; +} + static void format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr, struct ds *ds, bool verbose) @@ -1963,7 +2047,10 @@ format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr, ds_put_cstr(ds, "),"); break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: - /* Not really implemented yet, handle as unknown. */ + ds_put_cstr(ds, "geneve("); + format_odp_tun_geneve(a, ma, ds, verbose); + ds_put_cstr(ds, "),"); + break; case __OVS_TUNNEL_KEY_ATTR_MAX: default: format_unknown_key(ds, a, ma); @@ -2014,8 +2101,6 @@ format_frag(struct ds *ds, const char *name, uint8_t key, } } -#define MASK(PTR, FIELD) PTR ? &PTR->FIELD : NULL - static void format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, const struct hmap *portno_names, struct ds *ds, @@ -2839,6 +2924,107 @@ scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) return 0; } +struct geneve_scan { + uint8_t d[252]; + int len; +}; + +static int +scan_geneve(const char *s, struct geneve_scan *key, struct geneve_scan *mask) +{ + const char *s_base = s; + struct geneve_opt *opt = (struct geneve_opt *)key->d; + struct geneve_opt *opt_mask = (struct geneve_opt *)(mask ? mask->d : NULL); + int len_remain = sizeof key->d; + + while (s[0] == '{' && len_remain >= sizeof *opt) { + int data_len = 0; + + s++; + len_remain -= sizeof *opt; + + if (!strncmp(s, "class=", 6)) { + s += 6; + s += scan_be16(s, &opt->opt_class, + mask ? &opt_mask->opt_class : NULL); + } else if (mask) { + memset(&opt_mask->opt_class, 0, sizeof opt_mask->opt_class); + } + + if (s[0] == ',') { + s++; + } + if (!strncmp(s, "type=", 5)) { + s += 5; + s += scan_u8(s, &opt->type, mask ? &opt_mask->type : NULL); + } else if (mask) { + memset(&opt_mask->type, 0, sizeof opt_mask->type); + } + + if (s[0] == ',') { + s++; + } + if (!strncmp(s, "len=", 4)) { + uint8_t opt_len, opt_len_mask; + s += 4; + s += scan_u8(s, &opt_len, mask ? &opt_len_mask : NULL); + + if (opt_len > 124 || opt_len % 4 || opt_len > len_remain) { + return 0; + } + opt->length = opt_len / 4; + if (mask) { + opt_mask->length = opt_len_mask; + } + data_len = opt_len; + } else if (mask) { + memset(&opt_mask->type, 0, sizeof opt_mask->type); + } + + if (s[0] == ',') { + s++; + } + if (parse_int_string(s, (uint8_t *)(opt + 1), data_len, (char **)&s)) { + return 0; + } + + if (mask) { + if (s[0] == '/') { + s++; + if (parse_int_string(s, (uint8_t *)(opt_mask + 1), + data_len, (char **)&s)) { + return 0; + } + } + opt_mask->r1 = 0; + opt_mask->r2 = 0; + opt_mask->r3 = 0; + } + + if (s[0] == '}') { + s++; + opt += 1 + data_len / 4; + if (mask) { + opt_mask += 1 + data_len / 4; + } + len_remain -= data_len; + } + } + + if (s[0] == ')') { + int len = sizeof key->d - len_remain; + + s++; + key->len = len; + if (mask) { + mask->len = len; + } + return s - s_base; + } + + return 0; +} + static void tun_flags_to_attr(struct ofpbuf *a, const void *data_) { @@ -2869,6 +3055,15 @@ vxlan_gbp_to_attr(struct ofpbuf *a, const void *data_) } } +static void +geneve_to_attr(struct ofpbuf *a, const void *data_) +{ + const struct geneve_scan *geneve = data_; + + nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, geneve->d, + geneve->len); +} + #define SCAN_PUT_ATTR(BUF, ATTR, DATA, FUNC) \ { \ unsigned long call_fn = (unsigned long)FUNC; \ @@ -3049,6 +3244,8 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, SCAN_FIELD_NESTED("tp_src=", ovs_be16, be16, OVS_TUNNEL_KEY_ATTR_TP_SRC); SCAN_FIELD_NESTED("tp_dst=", ovs_be16, be16, OVS_TUNNEL_KEY_ATTR_TP_DST); SCAN_FIELD_NESTED_FUNC("vxlan(gbp(", uint32_t, vxlan_gbp, vxlan_gbp_to_attr); + SCAN_FIELD_NESTED_FUNC("geneve(", struct geneve_scan, geneve, + geneve_to_attr); SCAN_FIELD_NESTED_FUNC("flags(", uint16_t, tun_flags, tun_flags_to_attr); } SCAN_END_NESTED(); diff --git a/tests/odp.at b/tests/odp.at index c3cea2de0..4dcf0b0fb 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -123,6 +123,10 @@ skb_mark(0x1234/0xfff0),in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14: echo '# Valid forms with tunnel header (wildcard flag).' sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,ttl=64,flags(-df+csum+key)),/' odp-base.txt + echo + echo '# Valid forms with Geneve header.' + sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,ttl=64,geneve({class=0,type=0,len=4,0xa\/0xff}{class=0xffff,type=0x1,len=4,0xffffffff}),flags(csum,key)),/' odp-base.txt + echo echo '# Valid forms with VLAN header.' sed 's/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ From c05c01cd86bdb26be1c107741c3ede2659e51a29 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 29 May 2015 10:41:05 -0700 Subject: [PATCH 065/146] odp-util: Fix alignment when scanning Geneve attributes. Clang complains about the fact that we use a byte array to scan Geneve attributes into since there are different alignment requirements: lib/odp-util.c:2936:30: error: cast from 'uint8_t *' (aka 'unsigned char *') to 'struct geneve_opt *' increases required alignment from 1 to 2 [-Werror,-Wcast-align] struct geneve_opt *opt = (struct geneve_opt *)key->d; ^~~~~~~~~~~~~~~~~~~~~~~~~~~ We can instead treat this as an array of Geneve option headers to ensure we get the right alignment and then there are no need for casts. Reported-by: Joe Stringer Signed-off-by: Jesse Gross Acked-by: Joe Stringer --- lib/odp-util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index c3497ea26..3204d16ae 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2925,7 +2925,7 @@ scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) } struct geneve_scan { - uint8_t d[252]; + struct geneve_opt d[63]; int len; }; @@ -2933,8 +2933,8 @@ static int scan_geneve(const char *s, struct geneve_scan *key, struct geneve_scan *mask) { const char *s_base = s; - struct geneve_opt *opt = (struct geneve_opt *)key->d; - struct geneve_opt *opt_mask = (struct geneve_opt *)(mask ? mask->d : NULL); + struct geneve_opt *opt = key->d; + struct geneve_opt *opt_mask = mask ? mask->d : NULL; int len_remain = sizeof key->d; while (s[0] == '{' && len_remain >= sizeof *opt) { From c4e119b275208230a636d870b4d87d0f398833a8 Mon Sep 17 00:00:00 2001 From: Alin Serdean Date: Fri, 29 May 2015 21:22:54 +0000 Subject: [PATCH 066/146] datapath-windows: Fix build. Removing a variable which breaks the windows forwarding extension build. The error: warning C4189: 'bufContext' : local variable is initialized but not referenced Signed-off-by: Alin Gabriel Serdean Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/PacketIO.c | 1 - 1 file changed, 1 deletion(-) diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 6847f5f97..7b2c0c81c 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -505,7 +505,6 @@ OvsCreateNewNBLsFromMultipleNBs(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST newNbls = NULL; PNET_BUFFER_LIST lastNbl = NULL; PNET_BUFFER_LIST nbl = NULL; - POVS_BUFFER_CONTEXT bufContext = NULL; BOOLEAN error = TRUE; do { From 5aa7f168b1d891cadbf04321d5729a02f5384031 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 25 Feb 2015 08:46:02 -0800 Subject: [PATCH 067/146] json: Fix error message for corner case in json_string_unescape(). The error message should not include bytes already copied from the input string. Found by inspection. Signed-off-by: Ben Pfaff Acked-by: Alex Wang --- lib/json.c | 3 ++- tests/ovsdb-data.at | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/json.c b/lib/json.c index f004771e1..93d6438b1 100644 --- a/lib/json.c +++ b/lib/json.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2014 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -831,6 +831,7 @@ json_string_unescape(const char *in, size_t in_len, char **outp) * lexer will never pass in a string that ends in a single * backslash, but json_string_unescape() has other callers that * are not as careful.*/ + ds_clear(&out); ds_put_cstr(&out, "quoted string may not end with backslash"); goto exit; } diff --git a/tests/ovsdb-data.at b/tests/ovsdb-data.at index 51d32e731..f44d6d4f2 100644 --- a/tests/ovsdb-data.at +++ b/tests/ovsdb-data.at @@ -286,6 +286,14 @@ OVSDB_CHECK_NEGATIVE([quotes must be balanced], [parse-atom-strings '[["string"]]' '"asdf'], ["asdf: missing quote at end of quoted string]) +OVSDB_CHECK_NEGATIVE([quoted string must not contain unescaped quote], + [parse-atom-strings '[["string"]]' '"as"df"'], + ["as"df": quoted string may not include unescaped "]) + +OVSDB_CHECK_NEGATIVE([quoted string must not end with backslash], + [parse-atom-strings '[["string"]]' '"asdf\"'], + ["asdf\": quoted string may not end with backslash]) + OVSDB_CHECK_NEGATIVE([uuids must be valid], [parse-atom-strings '[["uuid"]]' '1234-5678'], ["1234-5678" is not a valid UUID]) From 7a1290cf393ac9ce7b0c69b7f4de4e0176d8333e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 3 Mar 2015 13:28:32 -0800 Subject: [PATCH 068/146] ofp-errors: Add Nicira extension code for OFPBMC_BAD_FIELD. There are a couple of cases where OpenFlow 1.0 controllers that use Nicira extensions can get OFPBMC_BAD_FIELD, so we should have an error code for it in that protocol. Reported-by: Soner Sevinc Signed-off-by: Ben Pfaff Acked-by: Jarno Rajahalme --- AUTHORS | 1 + lib/ofp-errors.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 574fba5b8..a2b40a733 100644 --- a/AUTHORS +++ b/AUTHORS @@ -340,6 +340,7 @@ Sridhar Samudrala samudrala.sridhar@gmail.com Srini Seetharaman seethara@stanford.edu Sabyasachi Sengupta Sabyasachi.Sengupta@alcatel-lucent.com Salvatore Cambria salvatore.cambria@citrix.com +Soner Sevinc sevincs@vmware.com Stephen Hemminger shemminger@vyatta.com Suganya Ramachandran suganyar@vmware.com Takayuki HAMA t-hama@cb.jp.nec.com diff --git a/lib/ofp-errors.h b/lib/ofp-errors.h index aff207562..e6c921195 100644 --- a/lib/ofp-errors.h +++ b/lib/ofp-errors.h @@ -329,7 +329,7 @@ enum ofperr { * match. */ OFPERR_OFPBMC_BAD_WILDCARDS, - /* OF1.1+(4,6). Unsupported field in the match. */ + /* NX1.0(0,263), OF1.1+(4,6). Unsupported field in the match. */ OFPERR_OFPBMC_BAD_FIELD, /* NX1.0(1,258), OF1.1+(4,7). Unsupported value in a match From b440dd8cd4313a09f8ea4cb79da8ac8eab94b010 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 24 Feb 2015 16:40:42 -0800 Subject: [PATCH 069/146] ofproto-dpif: Refactor feature support structure. Place all of the detected datapath features into a separate structure, initialized when the dpif_backer is opened and shared with xbridges. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- ofproto/ofproto-dpif-xlate.c | 80 +++++++++++++----------------------- ofproto/ofproto-dpif-xlate.h | 7 +--- ofproto/ofproto-dpif.c | 64 +++++++++++------------------ ofproto/ofproto-dpif.h | 25 +++++++++++ 4 files changed, 80 insertions(+), 96 deletions(-) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index f73787744..71b8beffc 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -96,21 +96,8 @@ struct xbridge { bool has_in_band; /* Bridge has in band control? */ bool forward_bpdu; /* Bridge forwards STP BPDUs? */ - /* True if the datapath supports recirculation. */ - bool enable_recirc; - - /* True if the datapath supports variable-length - * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. - * False if the datapath supports only 8-byte (or shorter) userdata. */ - bool variable_length_userdata; - - /* Number of MPLS label stack entries that the datapath supports - * in matches. */ - size_t max_mpls_depth; - - /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET - * actions. */ - bool masked_set_action; + /* Datapath feature support. */ + struct dpif_backer_support support; }; struct xbundle { @@ -492,10 +479,7 @@ static void xlate_xbridge_set(struct xbridge *, struct dpif *, const struct dpif_ipfix *, const struct netflow *, bool forward_bpdu, bool has_in_band, - bool enable_recirc, - bool variable_length_userdata, - size_t max_mpls_depth, - bool masked_set_action); + const struct dpif_backer_support *); static void xlate_xbundle_set(struct xbundle *xbundle, enum port_vlan_mode vlan_mode, int vlan, unsigned long *trunks, bool use_priority_tags, @@ -563,10 +547,7 @@ xlate_xbridge_set(struct xbridge *xbridge, const struct dpif_ipfix *ipfix, const struct netflow *netflow, bool forward_bpdu, bool has_in_band, - bool enable_recirc, - bool variable_length_userdata, - size_t max_mpls_depth, - bool masked_set_action) + const struct dpif_backer_support *support) { if (xbridge->ml != ml) { mac_learning_unref(xbridge->ml); @@ -611,10 +592,7 @@ xlate_xbridge_set(struct xbridge *xbridge, xbridge->dpif = dpif; xbridge->forward_bpdu = forward_bpdu; xbridge->has_in_band = has_in_band; - xbridge->enable_recirc = enable_recirc; - xbridge->variable_length_userdata = variable_length_userdata; - xbridge->max_mpls_depth = max_mpls_depth; - xbridge->masked_set_action = masked_set_action; + xbridge->support = *support; } static void @@ -698,10 +676,8 @@ xlate_xbridge_copy(struct xbridge *xbridge) xbridge->dpif, xbridge->ml, xbridge->stp, xbridge->rstp, xbridge->ms, xbridge->mbridge, xbridge->sflow, xbridge->ipfix, xbridge->netflow, - xbridge->forward_bpdu, - xbridge->has_in_band, xbridge->enable_recirc, - xbridge->variable_length_userdata, - xbridge->max_mpls_depth, xbridge->masked_set_action); + xbridge->forward_bpdu, xbridge->has_in_band, + &xbridge->support); LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) { xlate_xbundle_copy(new_xbridge, xbundle); } @@ -852,9 +828,8 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, const struct dpif_sflow *sflow, const struct dpif_ipfix *ipfix, const struct netflow *netflow, - bool forward_bpdu, bool has_in_band, bool enable_recirc, - bool variable_length_userdata, size_t max_mpls_depth, - bool masked_set_action) + bool forward_bpdu, bool has_in_band, + const struct dpif_backer_support *support) { struct xbridge *xbridge; @@ -872,9 +847,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, xbridge->name = xstrdup(name); xlate_xbridge_set(xbridge, dpif, ml, stp, rstp, ms, mbridge, sflow, ipfix, - netflow, forward_bpdu, has_in_band, enable_recirc, - variable_length_userdata, max_mpls_depth, - masked_set_action); + netflow, forward_bpdu, has_in_band, support); } static void @@ -1754,7 +1727,7 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle, struct flow_wildcards *wc = &ctx->xout->wc; struct ofport_dpif *ofport; - if (ctx->xbridge->enable_recirc) { + if (ctx->xbridge->support.recirc) { use_recirc = bond_may_recirc( out_xbundle->bond, &xr.recirc_id, &xr.hash_basis); @@ -2953,10 +2926,11 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, } if (out_port != ODPP_NONE) { + bool use_masked = ctx->xbridge->support.masked_set_action; + ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow, ctx->xout->odp_actions, - wc, - ctx->xbridge->masked_set_action); + wc, use_masked); if (xr) { struct ovs_action_hash *act_hash; @@ -3426,6 +3400,7 @@ execute_controller_action(struct xlate_ctx *ctx, int len, { struct ofproto_packet_in *pin; struct dp_packet *packet; + bool use_masked; ctx->xout->slow |= SLOW_CONTROLLER; if (!ctx->xin->packet) { @@ -3434,10 +3409,10 @@ execute_controller_action(struct xlate_ctx *ctx, int len, packet = dp_packet_clone(ctx->xin->packet); + use_masked = ctx->xbridge->support.masked_set_action; ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, ctx->xout->odp_actions, - &ctx->xout->wc, - ctx->xbridge->masked_set_action); + &ctx->xout->wc, use_masked); odp_execute_actions(NULL, &packet, 1, false, ctx->xout->odp_actions->data, @@ -3479,12 +3454,13 @@ static void compose_recirculate_action(struct xlate_ctx *ctx) { struct recirc_metadata md; + bool use_masked; uint32_t id; + use_masked = ctx->xbridge->support.masked_set_action; ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, ctx->xout->odp_actions, - &ctx->xout->wc, - ctx->xbridge->masked_set_action); + &ctx->xout->wc, use_masked); recirc_metadata_from_flow(&md, &ctx->xin->flow); @@ -3534,10 +3510,11 @@ compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls) n = flow_count_mpls_labels(flow, wc); if (!n) { + bool use_masked = ctx->xbridge->support.masked_set_action; + ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow, ctx->xout->odp_actions, - &ctx->xout->wc, - ctx->xbridge->masked_set_action); + &ctx->xout->wc, use_masked); } else if (n >= FLOW_MAX_MPLS_LABELS) { if (ctx->xin->packet != NULL) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -3561,7 +3538,7 @@ compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type) int n = flow_count_mpls_labels(flow, wc); if (flow_pop_mpls(flow, n, eth_type, wc)) { - if (ctx->xbridge->enable_recirc) { + if (ctx->xbridge->support.recirc) { ctx->was_mpls = true; } } else if (n >= FLOW_MAX_MPLS_LABELS) { @@ -3885,8 +3862,9 @@ xlate_sample_action(struct xlate_ctx *ctx, /* Scale the probability from 16-bit to 32-bit while representing * the same percentage. */ uint32_t probability = (os->probability << 16) | os->probability; + bool use_masked; - if (!ctx->xbridge->variable_length_userdata) { + if (!ctx->xbridge->support.variable_length_userdata) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_ERR_RL(&rl, "ignoring NXAST_SAMPLE action because datapath " @@ -3895,10 +3873,10 @@ xlate_sample_action(struct xlate_ctx *ctx, return; } + use_masked = ctx->xbridge->support.masked_set_action; ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, ctx->xout->odp_actions, - &ctx->xout->wc, - ctx->xbridge->masked_set_action); + &ctx->xout->wc, use_masked); compose_flow_sample_cookie(os->probability, os->collector_set_id, os->obs_domain_id, os->obs_point_id, &cookie); @@ -4743,7 +4721,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) if (is_ip_any(flow)) { wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; } - if (xbridge->enable_recirc) { + if (xbridge->support.recirc) { /* Always exactly match recirc_id when datapath supports * recirculation. */ wc->masks.recirc_id = UINT32_MAX; diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 6c8ade308..e39847b69 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -210,11 +210,8 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *, struct rstp *, const struct mcast_snooping *, const struct mbridge *, const struct dpif_sflow *, const struct dpif_ipfix *, const struct netflow *, - bool forward_bpdu, - bool has_in_band, bool enable_recirc, - bool variable_length_userdata, - size_t mpls_label_stack_length, - bool masked_set_action); + bool forward_bpdu, bool has_in_band, + const struct dpif_backer_support *support); void xlate_remove_ofproto(struct ofproto_dpif *); void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index b11938420..c1daa1dc1 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -271,30 +271,11 @@ struct dpif_backer { bool recv_set_enable; /* Enables or disables receiving packets. */ - /* Recirculation. */ - bool enable_recirc; /* True if the datapath supports recirculation */ - - /* True if the datapath supports unique flow identifiers */ - bool enable_ufid; - - /* True if the datapath supports variable-length - * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. - * False if the datapath supports only 8-byte (or shorter) userdata. */ - bool variable_length_userdata; - - /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET - * actions. */ - bool masked_set_action; - - /* Maximum number of MPLS label stack entries that the datapath supports - * in a match */ - size_t max_mpls_depth; - /* Version string of the datapath stored in OVSDB. */ char *dp_version_string; - /* True if the datapath supports tnl_push and pop actions. */ - bool enable_tnl_push_pop; + /* Datapath feature support. */ + struct dpif_backer_support support; struct atomic_count tnl_count; }; @@ -370,19 +351,19 @@ ofproto_dpif_cast(const struct ofproto *ofproto) size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto) { - return ofproto->backer->max_mpls_depth; + return ofproto->backer->support.max_mpls_depth; } bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto) { - return ofproto->backer->enable_recirc; + return ofproto->backer->support.recirc; } bool ofproto_dpif_get_enable_ufid(struct dpif_backer *backer) { - return backer->enable_ufid; + return backer->support.ufid; } static void ofproto_trace(struct ofproto_dpif *, struct flow *, @@ -646,10 +627,7 @@ type_run(const char *type) ofproto->netflow, ofproto->up.forward_bpdu, connmgr_has_in_band(ofproto->up.connmgr), - ofproto->backer->enable_recirc, - ofproto->backer->variable_length_userdata, - ofproto->backer->max_mpls_depth, - ofproto->backer->masked_set_action); + &ofproto->backer->support); HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { xlate_bundle_set(ofproto, bundle, bundle->name, @@ -873,10 +851,7 @@ struct odp_garbage { }; static bool check_variable_length_userdata(struct dpif_backer *backer); -static size_t check_max_mpls_depth(struct dpif_backer *backer); -static bool check_recirc(struct dpif_backer *backer); -static bool check_ufid(struct dpif_backer *backer); -static bool check_masked_set_action(struct dpif_backer *backer); +static void check_support(struct dpif_backer *backer); static int open_dpif_backer(const char *type, struct dpif_backer **backerp) @@ -971,12 +946,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) shash_add(&all_dpif_backers, type, backer); - backer->enable_recirc = check_recirc(backer); - backer->max_mpls_depth = check_max_mpls_depth(backer); - backer->masked_set_action = check_masked_set_action(backer); - backer->enable_ufid = check_ufid(backer); - - backer->enable_tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif); + check_support(backer); atomic_count_init(&backer->tnl_count, 0); error = dpif_recv_set(backer->dpif, backer->recv_set_enable); @@ -994,7 +964,8 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) /* This check fails if performed before udpif threads have been set, * as the kernel module checks that the 'pid' in userspace action * is non-zero. */ - backer->variable_length_userdata = check_variable_length_userdata(backer); + backer->support.variable_length_userdata + = check_variable_length_userdata(backer); backer->dp_version_string = dpif_get_dp_version(backer->dpif); return error; @@ -1003,7 +974,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) bool ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto) { - return ofproto_use_tnl_push_pop && ofproto->backer->enable_tnl_push_pop && + return ofproto_use_tnl_push_pop && ofproto->backer->support.tnl_push_pop && atomic_count_get(&ofproto->backer->tnl_count); } @@ -1228,6 +1199,19 @@ check_masked_set_action(struct dpif_backer *backer) return !error; } +static void +check_support(struct dpif_backer *backer) +{ + /* This feature needs to be tested after udpif threads are set. */ + backer->support.variable_length_userdata = false; + + backer->support.recirc = check_recirc(backer); + backer->support.max_mpls_depth = check_max_mpls_depth(backer); + backer->support.masked_set_action = check_masked_set_action(backer); + backer->support.ufid = check_ufid(backer); + backer->support.tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif); +} + static int construct(struct ofproto *ofproto_) { diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 956692422..9d625a162 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -73,6 +73,31 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); * Ofproto-dpif-xlate is responsible for translating OpenFlow actions into * datapath actions. */ +/* Stores the various features which the corresponding backer supports. */ +struct dpif_backer_support { + /* True if the datapath supports variable-length + * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. + * False if the datapath supports only 8-byte (or shorter) userdata. */ + bool variable_length_userdata; + + /* Maximum number of MPLS label stack entries that the datapath supports + * in a match */ + size_t max_mpls_depth; + + /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET + * actions. */ + bool masked_set_action; + + /* True if the datapath supports recirculation. */ + bool recirc; + + /* True if the datapath supports tnl_push and pop actions. */ + bool tnl_push_pop; + + /* True if the datapath supports OVS_FLOW_ATTR_UFID. */ + bool ufid; +}; + size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *); bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *); bool ofproto_dpif_get_enable_ufid(struct dpif_backer *backer); From db8bb9a51e237a55ee5c0b8800ef954e5f84e798 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 6 Mar 2015 10:09:10 -0800 Subject: [PATCH 070/146] odp-execute: Refactor determining dpif assistance. To be more explicit about which actions require datapath assistance, split this out into a separate function. While this is fairly trivial currently, there will be more special cases for the upcoming conntrack changes. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- lib/odp-execute.c | 49 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/lib/odp-execute.c b/lib/odp-execute.c index f83fe6058..b7851048b 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -464,6 +464,38 @@ odp_execute_sample(void *dp, struct dp_packet *packet, bool steal, nl_attr_get_size(subactions), dp_execute_action); } +static bool +requires_datapath_assistance(const struct nlattr *a) +{ + enum ovs_action_attr type = nl_attr_type(a); + + switch (type) { + /* These only make sense in the context of a datapath. */ + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_TUNNEL_PUSH: + case OVS_ACTION_ATTR_TUNNEL_POP: + case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_RECIRC: + return true; + + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SET_MASKED: + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_HASH: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_POP_MPLS: + return false; + + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + OVS_NOT_REACHED(); + } + + return false; +} + void odp_execute_actions(void *dp, struct dp_packet **packets, int cnt, bool steal, const struct nlattr *actions, size_t actions_len, @@ -477,13 +509,7 @@ odp_execute_actions(void *dp, struct dp_packet **packets, int cnt, bool steal, int type = nl_attr_type(a); bool last_action = (left <= NLA_ALIGN(a->nla_len)); - switch ((enum ovs_action_attr) type) { - /* These only make sense in the context of a datapath. */ - case OVS_ACTION_ATTR_OUTPUT: - case OVS_ACTION_ATTR_TUNNEL_PUSH: - case OVS_ACTION_ATTR_TUNNEL_POP: - case OVS_ACTION_ATTR_USERSPACE: - case OVS_ACTION_ATTR_RECIRC: + if (requires_datapath_assistance(a)) { if (dp_execute_action) { /* Allow 'dp_execute_action' to steal the packet data if we do * not need it any more. */ @@ -497,8 +523,10 @@ odp_execute_actions(void *dp, struct dp_packet **packets, int cnt, bool steal, return; } } - break; + continue; + } + switch ((enum ovs_action_attr) type) { case OVS_ACTION_ATTR_HASH: { const struct ovs_action_hash *hash_act = nl_attr_get(a); @@ -578,6 +606,11 @@ odp_execute_actions(void *dp, struct dp_packet **packets, int cnt, bool steal, } break; + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_TUNNEL_PUSH: + case OVS_ACTION_ATTR_TUNNEL_POP: + case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); From 6f17821ed39b7cc54320295621bbe4ff88d55881 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 29 May 2015 15:15:01 -0700 Subject: [PATCH 071/146] learn: Improve error reporting. parse_int_string() returns an error if the field overflows, not if there is a bad hex digit. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross --- lib/learn.c | 4 ++-- tests/learn.at | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/learn.c b/lib/learn.c index 8ff1e0a89..a0e32c6eb 100644 --- a/lib/learn.c +++ b/lib/learn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -197,7 +197,7 @@ learn_parse_load_immediate(const char *s, struct ofpact_learn_spec *spec) err = parse_int_string(s, imm.u8, sizeof imm.u8, (char **) &s); if (err) { - return xasprintf("%s: bad hex digit in value", full_s); + return xasprintf("%s: too many bits in immediate value", full_s); } if (strncmp(s, "->", 2)) { diff --git a/tests/learn.at b/tests/learn.at index 322ab846b..0372e4a90 100644 --- a/tests/learn.at +++ b/tests/learn.at @@ -91,6 +91,20 @@ ovs-ofctl: actions are invalid with specified match (OFPBAC_MATCH_INCONSISTENT) ]]) AT_CLEANUP +AT_SETUP([learning action - too-long immediate value]) +dnl 129 bits is too long. +AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(load:0x1fedbca9876543210fedbca9876543210->NXM_NX_IPV6_DST[])']], + [1], [], [[ovs-ofctl: 0x1fedbca9876543210fedbca9876543210->NXM_NX_IPV6_DST[]: too many bits in immediate value +]]) + +dnl 128 bits is merely a bad prerequisite. +AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(load:0xfedbca9876543210fedbca9876543210->NXM_NX_IPV6_DST[])']], [1], [], [stderr]) +AT_CHECK([sed -e 's/.*|meta_flow|WARN|//' < stderr], [0], + [[destination field ipv6_dst lacks correct prerequisites +ovs-ofctl: actions are invalid with specified match (OFPBAC_MATCH_INCONSISTENT) +]], [[]]) +AT_CLEANUP + AT_SETUP([learning action - standard VLAN+MAC learning]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ From c005fd6147ce3ffc310aa9811352d10bf7a831cb Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 8 Apr 2015 18:05:27 -0700 Subject: [PATCH 072/146] ovsdb-test: add multiple clients to backlogged connection test Backlogged connection test tests jsonrpc monitor's ability to combine updates. Adding multiple clients to ensure that non-blocking clients will get individual updates while blocking clients will get combined updates. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- tests/ovsdb-server.at | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at index c9ce4b1fd..eacca356c 100644 --- a/tests/ovsdb-server.at +++ b/tests/ovsdb-server.at @@ -751,6 +751,13 @@ add_ports () { add=`add_ports` AT_CAPTURE_FILE([ovsdb-client.err]) +AT_CAPTURE_FILE([ovsdb-client-nonblock.err]) + + +# Start an ovsdb-client monitoring all changes to the database, +# By default, it is non-blocking, and will get update message +# for each ovsdb-server transaactions. +AT_CHECK([ovsdb-client --detach --no-chdir --pidfile=nonblock.pid monitor ALL >ovsdb-client-nonblock.out 2>ovsdb-client-nonblock.err]) # Start an ovsdb-client monitoring all changes to the database, # make it block to force the buffers to fill up, and then execute @@ -763,8 +770,10 @@ for i in `seq 1 $n_iterations`; do done AT_CHECK([ovs-appctl -t ovsdb-client ovsdb-client/unblock]) OVS_WAIT_UNTIL([grep "\"xyzzy$counter\"" ovsdb-client.out]) +OVS_WAIT_UNTIL([grep "\"xyzzy$counter\"" ovsdb-client-nonblock.out]) AT_CHECK([ovs-appctl -t ovsdb-client exit]) OVS_WAIT_WHILE([test -e ovsdb-client.pid]) +AT_CHECK([kill `cat nonblock.pid`]) # Count the number of updates in the ovsdb-client output, by counting # the number of changes to the Open_vSwitch table. (All of our @@ -773,8 +782,11 @@ OVS_WAIT_WHILE([test -e ovsdb-client.pid]) # # Check that the counter is what we expect. logged_updates=`grep -c '^Open_vSwitch' ovsdb-client.out` -echo "logged_updates=$logged_updates (expected less than $n_updates)" -AT_CHECK([test $logged_updates -lt $n_updates]) +logged_nonblock_updates=`grep -c '^Open_vSwitch' ovsdb-client-nonblock.out` +echo "logged_nonblock_updates=$logged_nonblock_updates (expected less or equal to $n_updates)" +echo "logged_updates=$logged_updates (expected less than $logged_nonblock_updates)" +AT_CHECK([test $logged_nonblock_updates -le $n_updates]) +AT_CHECK([test $logged_updates -lt $logged_nonblock_updates]) AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0], ["xyzzy$counter" ]) From 897af58755f3f75c880642171239caa98530e4ad Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 16:50:41 -0700 Subject: [PATCH 073/146] jsonrpc-server: split monitors into database back end and JSON-RPC front end jsonrpc-server.c has two main functions. One deals with handling the jsonrpc connections, the other deals with monitoring the database. Currently, each jsonrpc connections has its own set of DB monitors. This can be wasteful if a number of connections shares the same monitors. This patch, and a few following refactoring patches attempts to split the jsonrpc handling front end off the main monitoring functions within jsonrpc.c. This patch changes the monitoring functions and data structures from 'ovsdb_jsonrpc_monitor_xxx' into 'ovsdb_monitor_xxx' This and the following patches move the ovsdb_monitor backend functions into their own file. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 221 +++++++++++++++++++++++------------------ 1 file changed, 124 insertions(+), 97 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 95b49958f..76755856c 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1026,9 +1026,9 @@ ovsdb_jsonrpc_trigger_complete_done(struct ovsdb_jsonrpc_session *s) } } -/* JSON-RPC database table monitors. */ +/* database table monitors. */ -enum ovsdb_jsonrpc_monitor_selection { +enum ovsdb_monitor_selection { OJMS_INITIAL = 1 << 0, /* All rows when monitor is created. */ OJMS_INSERT = 1 << 1, /* New rows. */ OJMS_DELETE = 1 << 2, /* Deleted rows. */ @@ -1036,13 +1036,13 @@ enum ovsdb_jsonrpc_monitor_selection { }; /* A particular column being monitored. */ -struct ovsdb_jsonrpc_monitor_column { +struct ovsdb_monitor_column { const struct ovsdb_column *column; - enum ovsdb_jsonrpc_monitor_selection select; + enum ovsdb_monitor_selection select; }; /* A row that has changed in a monitored table. */ -struct ovsdb_jsonrpc_monitor_row { +struct ovsdb_monitor_row { struct hmap_node hmap_node; /* In ovsdb_jsonrpc_monitor_table.changes. */ struct uuid uuid; /* UUID of row that changed. */ struct ovsdb_datum *old; /* Old data, NULL for an inserted row. */ @@ -1050,38 +1050,49 @@ struct ovsdb_jsonrpc_monitor_row { }; /* A particular table being monitored. */ -struct ovsdb_jsonrpc_monitor_table { +struct ovsdb_monitor_table { const struct ovsdb_table *table; /* This is the union (bitwise-OR) of the 'select' values in all of the * members of 'columns' below. */ - enum ovsdb_jsonrpc_monitor_selection select; + enum ovsdb_monitor_selection select; /* Columns being monitored. */ - struct ovsdb_jsonrpc_monitor_column *columns; + struct ovsdb_monitor_column *columns; size_t n_columns; - /* Contains 'struct ovsdb_jsonrpc_monitor_row's for rows that have been + /* Contains 'struct ovsdb_monitor_row's for rows that have been * updated but not yet flushed to the jsonrpc connection. */ struct hmap changes; }; +struct ovsdb_jsonrpc_monitor; +/* Backend monitor. + * + * ovsdb_monitor keep track of the ovsdb changes. + */ /* A collection of tables being monitored. */ -struct ovsdb_jsonrpc_monitor { +struct ovsdb_monitor { struct ovsdb_replica replica; + struct shash tables; /* Holds "struct ovsdb_monitor_table"s. */ + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor; +}; + +/* Jsonrpc front end monitor. */ +struct ovsdb_jsonrpc_monitor { struct ovsdb_jsonrpc_session *session; struct ovsdb *db; struct hmap_node node; /* In ovsdb_jsonrpc_session's "monitors". */ struct json *monitor_id; - struct shash tables; /* Holds "struct ovsdb_jsonrpc_monitor_table"s. */ + struct ovsdb_monitor *dbmon; }; static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; struct ovsdb_jsonrpc_monitor *ovsdb_jsonrpc_monitor_find( struct ovsdb_jsonrpc_session *, const struct json *monitor_id); -static void ovsdb_jsonrpc_monitor_destroy(struct ovsdb_replica *); +static void ovsdb_monitor_destroy(struct ovsdb_replica *); static struct json *ovsdb_jsonrpc_monitor_get_initial( const struct ovsdb_jsonrpc_monitor *); @@ -1110,12 +1121,12 @@ ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s, } static void -ovsdb_jsonrpc_add_monitor_column(struct ovsdb_jsonrpc_monitor_table *mt, - const struct ovsdb_column *column, - enum ovsdb_jsonrpc_monitor_selection select, - size_t *allocated_columns) +ovsdb_add_monitor_column(struct ovsdb_monitor_table *mt, + const struct ovsdb_column *column, + enum ovsdb_monitor_selection select, + size_t *allocated_columns) { - struct ovsdb_jsonrpc_monitor_column *c; + struct ovsdb_monitor_column *c; if (mt->n_columns >= *allocated_columns) { mt->columns = x2nrealloc(mt->columns, allocated_columns, @@ -1128,21 +1139,21 @@ ovsdb_jsonrpc_add_monitor_column(struct ovsdb_jsonrpc_monitor_table *mt, } static int -compare_ovsdb_jsonrpc_monitor_column(const void *a_, const void *b_) +compare_ovsdb_monitor_column(const void *a_, const void *b_) { - const struct ovsdb_jsonrpc_monitor_column *a = a_; - const struct ovsdb_jsonrpc_monitor_column *b = b_; + const struct ovsdb_monitor_column *a = a_; + const struct ovsdb_monitor_column *b = b_; return a->column < b->column ? -1 : a->column > b->column; } static struct ovsdb_error * OVS_WARN_UNUSED_RESULT -ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_jsonrpc_monitor_table *mt, +ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor_table *mt, const struct json *monitor_request, size_t *allocated_columns) { const struct ovsdb_table_schema *ts = mt->table->schema; - enum ovsdb_jsonrpc_monitor_selection select; + enum ovsdb_monitor_selection select; const struct json *columns, *select_json; struct ovsdb_parser parser; struct ovsdb_error *error; @@ -1203,8 +1214,7 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_jsonrpc_monitor_table *mt, return ovsdb_syntax_error(columns, NULL, "%s is not a valid " "column name", s); } - ovsdb_jsonrpc_add_monitor_column(mt, column, select, - allocated_columns); + ovsdb_add_monitor_column(mt, column, select, allocated_columns); } } else { struct shash_node *node; @@ -1212,8 +1222,8 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_jsonrpc_monitor_table *mt, SHASH_FOR_EACH (node, &ts->columns) { const struct ovsdb_column *column = node->data; if (column->index != OVSDB_COL_UUID) { - ovsdb_jsonrpc_add_monitor_column(mt, column, select, - allocated_columns); + ovsdb_add_monitor_column(mt, column, select, + allocated_columns); } } } @@ -1221,6 +1231,23 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_jsonrpc_monitor_table *mt, return NULL; } +static struct ovsdb_monitor * +ovsdb_monitor_create(struct ovsdb *db, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor, + const struct ovsdb_replica_class *replica_class) +{ + struct ovsdb_monitor *m; + + m = xzalloc(sizeof *m); + + ovsdb_replica_init(&m->replica, replica_class); + ovsdb_add_replica(db, &m->replica); + m->jsonrpc_monitor = jsonrpc_monitor; + shash_init(&m->tables); + + return m; +} + static struct jsonrpc_msg * ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, struct json *params, @@ -1250,17 +1277,15 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, } m = xzalloc(sizeof *m); - ovsdb_replica_init(&m->replica, &ovsdb_jsonrpc_replica_class); - ovsdb_add_replica(db, &m->replica); m->session = s; m->db = db; + m->dbmon = ovsdb_monitor_create(db, m, &ovsdb_jsonrpc_replica_class); hmap_insert(&s->monitors, &m->node, json_hash(monitor_id, 0)); m->monitor_id = json_clone(monitor_id); - shash_init(&m->tables); SHASH_FOR_EACH (node, json_object(monitor_requests)) { const struct ovsdb_table *table; - struct ovsdb_jsonrpc_monitor_table *mt; + struct ovsdb_monitor_table *mt; size_t allocated_columns; const struct json *mr_value; size_t i; @@ -1275,7 +1300,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, mt = xzalloc(sizeof *mt); mt->table = table; hmap_init(&mt->changes); - shash_add(&m->tables, table->schema->name, mt); + shash_add(&m->dbmon->tables, table->schema->name, mt); /* Parse columns. */ mr_value = node->data; @@ -1300,7 +1325,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, /* Check for duplicate columns. */ qsort(mt->columns, mt->n_columns, sizeof *mt->columns, - compare_ovsdb_jsonrpc_monitor_column); + compare_ovsdb_monitor_column); for (i = 1; i < mt->n_columns; i++) { if (mt->columns[i].column == mt->columns[i - 1].column) { error = ovsdb_syntax_error(mr_value, NULL, "column %s " @@ -1316,7 +1341,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, error: if (m) { - ovsdb_remove_replica(m->db, &m->replica); + ovsdb_remove_replica(m->db, &m->dbmon->replica); } json = ovsdb_error_to_json(error); @@ -1340,7 +1365,7 @@ ovsdb_jsonrpc_monitor_cancel(struct ovsdb_jsonrpc_session *s, return jsonrpc_create_error(json_string_create("unknown monitor"), request_id); } else { - ovsdb_remove_replica(m->db, &m->replica); + ovsdb_remove_replica(m->db, &m->dbmon->replica); return jsonrpc_create_reply(json_object_create(), request_id); } } @@ -1352,29 +1377,29 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) struct ovsdb_jsonrpc_monitor *m, *next; HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { - ovsdb_remove_replica(m->db, &m->replica); + ovsdb_remove_replica(m->db, &m->dbmon->replica); } } -static struct ovsdb_jsonrpc_monitor * -ovsdb_jsonrpc_monitor_cast(struct ovsdb_replica *replica) +static struct ovsdb_monitor * +ovsdb_monitor_cast(struct ovsdb_replica *replica) { ovs_assert(replica->class == &ovsdb_jsonrpc_replica_class); - return CONTAINER_OF(replica, struct ovsdb_jsonrpc_monitor, replica); + return CONTAINER_OF(replica, struct ovsdb_monitor, replica); } -struct ovsdb_jsonrpc_monitor_aux { - const struct ovsdb_jsonrpc_monitor *monitor; - struct ovsdb_jsonrpc_monitor_table *mt; +struct ovsdb_monitor_aux { + const struct ovsdb_monitor *monitor; + struct ovsdb_monitor_table *mt; }; -/* Finds and returns the ovsdb_jsonrpc_monitor_row in 'mt->changes' for the +/* Finds and returns the ovsdb_monitor_row in 'mt->changes' for the * given 'uuid', or NULL if there is no such row. */ -static struct ovsdb_jsonrpc_monitor_row * -ovsdb_jsonrpc_monitor_row_find(const struct ovsdb_jsonrpc_monitor_table *mt, - const struct uuid *uuid) +static struct ovsdb_monitor_row * +ovsdb_monitor_row_find(const struct ovsdb_monitor_table *mt, + const struct uuid *uuid) { - struct ovsdb_jsonrpc_monitor_row *row; + struct ovsdb_monitor_row *row; HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &mt->changes) { if (uuid_equals(uuid, &row->uuid)) { @@ -1390,7 +1415,7 @@ ovsdb_jsonrpc_monitor_row_find(const struct ovsdb_jsonrpc_monitor_table *mt, * * If 'row' is NULL, returns NULL. */ static struct ovsdb_datum * -clone_monitor_row_data(const struct ovsdb_jsonrpc_monitor_table *mt, +clone_monitor_row_data(const struct ovsdb_monitor_table *mt, const struct ovsdb_row *row) { struct ovsdb_datum *data; @@ -1415,7 +1440,7 @@ clone_monitor_row_data(const struct ovsdb_jsonrpc_monitor_table *mt, /* Replaces the mt->n_columns ovsdb_datums in row[] by copies of the data from * in 'row' drawn from the columns represented by mt->columns[]. */ static void -update_monitor_row_data(const struct ovsdb_jsonrpc_monitor_table *mt, +update_monitor_row_data(const struct ovsdb_monitor_table *mt, const struct ovsdb_row *row, struct ovsdb_datum *data) { @@ -1437,7 +1462,7 @@ update_monitor_row_data(const struct ovsdb_jsonrpc_monitor_table *mt, /* Frees all of the mt->n_columns ovsdb_datums in data[], using the types taken * from mt->columns[], plus 'data' itself. */ static void -free_monitor_row_data(const struct ovsdb_jsonrpc_monitor_table *mt, +free_monitor_row_data(const struct ovsdb_monitor_table *mt, struct ovsdb_datum *data) { if (data) { @@ -1454,8 +1479,8 @@ free_monitor_row_data(const struct ovsdb_jsonrpc_monitor_table *mt, /* Frees 'row', which must have been created from 'mt'. */ static void -ovsdb_jsonrpc_monitor_row_destroy(const struct ovsdb_jsonrpc_monitor_table *mt, - struct ovsdb_jsonrpc_monitor_row *row) +ovsdb_monitor_row_destroy(const struct ovsdb_monitor_table *mt, + struct ovsdb_monitor_row *row) { if (row) { free_monitor_row_data(mt, row->old); @@ -1465,17 +1490,17 @@ ovsdb_jsonrpc_monitor_row_destroy(const struct ovsdb_jsonrpc_monitor_table *mt, } static bool -ovsdb_jsonrpc_monitor_change_cb(const struct ovsdb_row *old, - const struct ovsdb_row *new, - const unsigned long int *changed OVS_UNUSED, - void *aux_) +ovsdb_monitor_change_cb(const struct ovsdb_row *old, + const struct ovsdb_row *new, + const unsigned long int *changed OVS_UNUSED, + void *aux_) { - struct ovsdb_jsonrpc_monitor_aux *aux = aux_; - const struct ovsdb_jsonrpc_monitor *m = aux->monitor; + struct ovsdb_monitor_aux *aux = aux_; + const struct ovsdb_monitor *m = aux->monitor; struct ovsdb_table *table = new ? new->table : old->table; const struct uuid *uuid = ovsdb_row_get_uuid(new ? new : old); - struct ovsdb_jsonrpc_monitor_row *change; - struct ovsdb_jsonrpc_monitor_table *mt; + struct ovsdb_monitor_row *change; + struct ovsdb_monitor_table *mt; if (!aux->mt || table != aux->mt->table) { aux->mt = shash_find_data(&m->tables, table->schema->name); @@ -1487,7 +1512,7 @@ ovsdb_jsonrpc_monitor_change_cb(const struct ovsdb_row *old, } mt = aux->mt; - change = ovsdb_jsonrpc_monitor_row_find(mt, uuid); + change = ovsdb_monitor_row_find(mt, uuid); if (!change) { change = xmalloc(sizeof *change); hmap_insert(&mt->changes, &change->hmap_node, uuid_hash(uuid)); @@ -1521,12 +1546,12 @@ ovsdb_jsonrpc_monitor_change_cb(const struct ovsdb_row *old, * 'changed' must be a scratch buffer for internal use that is at least * bitmap_n_bytes(mt->n_columns) bytes long. */ static struct json * -ovsdb_jsonrpc_monitor_compose_row_update( - const struct ovsdb_jsonrpc_monitor_table *mt, - const struct ovsdb_jsonrpc_monitor_row *row, +ovsdb_monitor_compose_row_update( + const struct ovsdb_monitor_table *mt, + const struct ovsdb_monitor_row *row, bool initial, unsigned long int *changed) { - enum ovsdb_jsonrpc_monitor_selection type; + enum ovsdb_monitor_selection type; struct json *old_json, *new_json; struct json *row_json; size_t i; @@ -1569,7 +1594,7 @@ ovsdb_jsonrpc_monitor_compose_row_update( json_object_put(row_json, "new", new_json); } for (i = 0; i < mt->n_columns; i++) { - const struct ovsdb_jsonrpc_monitor_column *c = &mt->columns[i]; + const struct ovsdb_monitor_column *c = &mt->columns[i]; if (!(type & c->select)) { /* We don't care about this type of change for this @@ -1612,23 +1637,23 @@ ovsdb_jsonrpc_monitor_compose_table_update( size_t max_columns; max_columns = 0; - SHASH_FOR_EACH (node, &monitor->tables) { - struct ovsdb_jsonrpc_monitor_table *mt = node->data; + SHASH_FOR_EACH (node, &monitor->dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; max_columns = MAX(max_columns, mt->n_columns); } changed = xmalloc(bitmap_n_bytes(max_columns)); json = NULL; - SHASH_FOR_EACH (node, &monitor->tables) { - struct ovsdb_jsonrpc_monitor_table *mt = node->data; - struct ovsdb_jsonrpc_monitor_row *row, *next; + SHASH_FOR_EACH (node, &monitor->dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + struct ovsdb_monitor_row *row, *next; struct json *table_json = NULL; HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { struct json *row_json; - row_json = ovsdb_jsonrpc_monitor_compose_row_update( + row_json = ovsdb_monitor_compose_row_update( mt, row, initial, changed); if (row_json) { char uuid[UUID_LEN + 1]; @@ -1650,7 +1675,7 @@ ovsdb_jsonrpc_monitor_compose_table_update( } hmap_remove(&mt->changes, &row->hmap_node); - ovsdb_jsonrpc_monitor_row_destroy(mt, row); + ovsdb_monitor_row_destroy(mt, row); } } @@ -1667,8 +1692,8 @@ ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *s) HMAP_FOR_EACH (m, node, &s->monitors) { struct shash_node *node; - SHASH_FOR_EACH (node, &m->tables) { - struct ovsdb_jsonrpc_monitor_table *mt = node->data; + SHASH_FOR_EACH (node, &m->dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; if (!hmap_is_empty(&mt->changes)) { return true; @@ -1700,23 +1725,23 @@ ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *s) } static void -ovsdb_jsonrpc_monitor_init_aux(struct ovsdb_jsonrpc_monitor_aux *aux, - const struct ovsdb_jsonrpc_monitor *m) +ovsdb_monitor_init_aux(struct ovsdb_monitor_aux *aux, + const struct ovsdb_monitor *m) { aux->monitor = m; aux->mt = NULL; } static struct ovsdb_error * -ovsdb_jsonrpc_monitor_commit(struct ovsdb_replica *replica, - const struct ovsdb_txn *txn, - bool durable OVS_UNUSED) +ovsdb_monitor_commit(struct ovsdb_replica *replica, + const struct ovsdb_txn *txn, + bool durable OVS_UNUSED) { - struct ovsdb_jsonrpc_monitor *m = ovsdb_jsonrpc_monitor_cast(replica); - struct ovsdb_jsonrpc_monitor_aux aux; + struct ovsdb_monitor *m = ovsdb_monitor_cast(replica); + struct ovsdb_monitor_aux aux; - ovsdb_jsonrpc_monitor_init_aux(&aux, m); - ovsdb_txn_for_each_change(txn, ovsdb_jsonrpc_monitor_change_cb, &aux); + ovsdb_monitor_init_aux(&aux, m); + ovsdb_txn_for_each_change(txn, ovsdb_monitor_change_cb, &aux); return NULL; } @@ -1724,19 +1749,19 @@ ovsdb_jsonrpc_monitor_commit(struct ovsdb_replica *replica, static struct json * ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m) { - struct ovsdb_jsonrpc_monitor_aux aux; + struct ovsdb_monitor_aux aux; struct shash_node *node; struct json *json; - ovsdb_jsonrpc_monitor_init_aux(&aux, m); - SHASH_FOR_EACH (node, &m->tables) { - struct ovsdb_jsonrpc_monitor_table *mt = node->data; + ovsdb_monitor_init_aux(&aux, m->dbmon); + SHASH_FOR_EACH (node, &m->dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; if (mt->select & OJMS_INITIAL) { struct ovsdb_row *row; HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { - ovsdb_jsonrpc_monitor_change_cb(NULL, row, NULL, &aux); + ovsdb_monitor_change_cb(NULL, row, NULL, &aux); } } } @@ -1745,19 +1770,20 @@ ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m) } static void -ovsdb_jsonrpc_monitor_destroy(struct ovsdb_replica *replica) +ovsdb_monitor_destroy(struct ovsdb_replica *replica) { - struct ovsdb_jsonrpc_monitor *m = ovsdb_jsonrpc_monitor_cast(replica); + struct ovsdb_monitor *m = ovsdb_monitor_cast(replica); + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor = m->jsonrpc_monitor; struct shash_node *node; - json_destroy(m->monitor_id); + json_destroy(jsonrpc_monitor->monitor_id); SHASH_FOR_EACH (node, &m->tables) { - struct ovsdb_jsonrpc_monitor_table *mt = node->data; - struct ovsdb_jsonrpc_monitor_row *row, *next; + struct ovsdb_monitor_table *mt = node->data; + struct ovsdb_monitor_row *row, *next; HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { hmap_remove(&mt->changes, &row->hmap_node); - ovsdb_jsonrpc_monitor_row_destroy(mt, row); + ovsdb_monitor_row_destroy(mt, row); } hmap_destroy(&mt->changes); @@ -1765,11 +1791,12 @@ ovsdb_jsonrpc_monitor_destroy(struct ovsdb_replica *replica) free(mt); } shash_destroy(&m->tables); - hmap_remove(&m->session->monitors, &m->node); + hmap_remove(&jsonrpc_monitor->session->monitors, &jsonrpc_monitor->node); + free(jsonrpc_monitor); free(m); } static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class = { - ovsdb_jsonrpc_monitor_commit, - ovsdb_jsonrpc_monitor_destroy + ovsdb_monitor_commit, + ovsdb_monitor_destroy }; From f3395ab3226b29b6e5460706b066189a40983595 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 18:02:53 -0700 Subject: [PATCH 074/146] jsonrpc-server: make setting mt->select into its own functions To make ovsdb_monitor an opaque to ovsdb_jsonrpc server object. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 76755856c..a70ed383a 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1147,6 +1147,13 @@ compare_ovsdb_monitor_column(const void *a_, const void *b_) return a->column < b->column ? -1 : a->column > b->column; } +static void +ovsdb_monitor_add_select(struct ovsdb_monitor_table *mt, + enum ovsdb_monitor_selection select) +{ + mt->select |= select; +} + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor_table *mt, const struct json *monitor_request, @@ -1189,8 +1196,8 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor_table *mt, } else { select = OJMS_INITIAL | OJMS_INSERT | OJMS_DELETE | OJMS_MODIFY; } - mt->select |= select; + ovsdb_monitor_add_select(mt, select); if (columns) { size_t i; From 83d300f6afecf29d63938b55265a74f5563a315d Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 18:57:55 -0700 Subject: [PATCH 075/146] jsonrpc-server: refactor ovsdb_jsonrpc_parse_monitor_request Change ovsdb_jsonrpc_parse_monitor_request() to make ovsdb_monitor_table an opaque object. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 73 +++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index a70ed383a..5375357a3 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1155,15 +1155,17 @@ ovsdb_monitor_add_select(struct ovsdb_monitor_table *mt, } static struct ovsdb_error * OVS_WARN_UNUSED_RESULT -ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor_table *mt, +ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, const struct json *monitor_request, size_t *allocated_columns) { - const struct ovsdb_table_schema *ts = mt->table->schema; + const struct ovsdb_table_schema *ts = table->schema; enum ovsdb_monitor_selection select; const struct json *columns, *select_json; struct ovsdb_parser parser; struct ovsdb_error *error; + struct ovsdb_monitor_table *mt; ovsdb_parser_init(&parser, monitor_request, "table %s", ts->name); columns = ovsdb_parser_member(&parser, "columns", OP_ARRAY | OP_OPTIONAL); @@ -1197,6 +1199,7 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor_table *mt, select = OJMS_INITIAL | OJMS_INSERT | OJMS_DELETE | OJMS_MODIFY; } + mt = shash_find_data(&dbmon->tables, table->schema->name); ovsdb_monitor_add_select(mt, select); if (columns) { size_t i; @@ -1255,6 +1258,44 @@ ovsdb_monitor_create(struct ovsdb *db, return m; } +static void +ovsdb_monitor_add_table(struct ovsdb_monitor *m, + const struct ovsdb_table *table) +{ + struct ovsdb_monitor_table *mt; + + mt = xzalloc(sizeof *mt); + mt->table = table; + hmap_init(&mt->changes); + shash_add(&m->tables, table->schema->name, mt); +} + +/* Check for duplicated column names. Return the first + * duplicated column's name if found. Otherwise return + * NULL. */ +static const char * OVS_WARN_UNUSED_RESULT +ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *m, + const struct ovsdb_table *table) +{ + struct ovsdb_monitor_table *mt; + int i; + + mt = shash_find_data(&m->tables, table->schema->name); + + if (mt) { + /* Check for duplicate columns. */ + qsort(mt->columns, mt->n_columns, sizeof *mt->columns, + compare_ovsdb_monitor_column); + for (i = 1; i < mt->n_columns; i++) { + if (mt->columns[i].column == mt->columns[i - 1].column) { + return mt->columns[i].column->name; + } + } + } + + return NULL; +} + static struct jsonrpc_msg * ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, struct json *params, @@ -1292,7 +1333,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, SHASH_FOR_EACH (node, json_object(monitor_requests)) { const struct ovsdb_table *table; - struct ovsdb_monitor_table *mt; + const char *column_name; size_t allocated_columns; const struct json *mr_value; size_t i; @@ -1304,10 +1345,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, goto error; } - mt = xzalloc(sizeof *mt); - mt->table = table; - hmap_init(&mt->changes); - shash_add(&m->dbmon->tables, table->schema->name, mt); + ovsdb_monitor_add_table(m->dbmon, table); /* Parse columns. */ mr_value = node->data; @@ -1317,29 +1355,26 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, for (i = 0; i < array->n; i++) { error = ovsdb_jsonrpc_parse_monitor_request( - mt, array->elems[i], &allocated_columns); + m->dbmon, table, array->elems[i], &allocated_columns); if (error) { goto error; } } } else { error = ovsdb_jsonrpc_parse_monitor_request( - mt, mr_value, &allocated_columns); + m->dbmon, table, mr_value, &allocated_columns); if (error) { goto error; } } - /* Check for duplicate columns. */ - qsort(mt->columns, mt->n_columns, sizeof *mt->columns, - compare_ovsdb_monitor_column); - for (i = 1; i < mt->n_columns; i++) { - if (mt->columns[i].column == mt->columns[i - 1].column) { - error = ovsdb_syntax_error(mr_value, NULL, "column %s " - "mentioned more than once", - mt->columns[i].column->name); - goto error; - } + column_name = ovsdb_monitor_table_check_duplicates(m->dbmon, table); + + if (column_name) { + error = ovsdb_syntax_error(mr_value, NULL, "column %s " + "mentioned more than once", + column_name); + goto error; } } From ea585a0e24f0d31ee593091045a4c6d008aba231 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 19:05:49 -0700 Subject: [PATCH 076/146] jsonrpc-server: refactor ovsdb_monitor_add_column() To hide ovsdb_monitor_table object from ovsdb_jsonrpc serve. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 5375357a3..f510c6848 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1121,13 +1121,17 @@ ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s, } static void -ovsdb_add_monitor_column(struct ovsdb_monitor_table *mt, +ovsdb_monitor_add_column(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, const struct ovsdb_column *column, enum ovsdb_monitor_selection select, size_t *allocated_columns) { + struct ovsdb_monitor_table *mt; struct ovsdb_monitor_column *c; + mt = shash_find_data(&dbmon->tables, table->schema->name); + if (mt->n_columns >= *allocated_columns) { mt->columns = x2nrealloc(mt->columns, allocated_columns, sizeof *mt->columns); @@ -1148,9 +1152,12 @@ compare_ovsdb_monitor_column(const void *a_, const void *b_) } static void -ovsdb_monitor_add_select(struct ovsdb_monitor_table *mt, - enum ovsdb_monitor_selection select) +ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, + enum ovsdb_monitor_selection select) { + struct ovsdb_monitor_table * mt; + mt = shash_find_data(&dbmon->tables, table->schema->name); mt->select |= select; } @@ -1165,7 +1172,6 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor *dbmon, const struct json *columns, *select_json; struct ovsdb_parser parser; struct ovsdb_error *error; - struct ovsdb_monitor_table *mt; ovsdb_parser_init(&parser, monitor_request, "table %s", ts->name); columns = ovsdb_parser_member(&parser, "columns", OP_ARRAY | OP_OPTIONAL); @@ -1199,8 +1205,7 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor *dbmon, select = OJMS_INITIAL | OJMS_INSERT | OJMS_DELETE | OJMS_MODIFY; } - mt = shash_find_data(&dbmon->tables, table->schema->name); - ovsdb_monitor_add_select(mt, select); + ovsdb_monitor_table_add_select(dbmon, table, select); if (columns) { size_t i; @@ -1219,12 +1224,13 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor *dbmon, } s = columns->u.array.elems[i]->u.string; - column = shash_find_data(&mt->table->schema->columns, s); + column = shash_find_data(&table->schema->columns, s); if (!column) { return ovsdb_syntax_error(columns, NULL, "%s is not a valid " "column name", s); } - ovsdb_add_monitor_column(mt, column, select, allocated_columns); + ovsdb_monitor_add_column(dbmon, table, column, select, + allocated_columns); } } else { struct shash_node *node; @@ -1232,7 +1238,7 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor *dbmon, SHASH_FOR_EACH (node, &ts->columns) { const struct ovsdb_column *column = node->data; if (column->index != OVSDB_COL_UUID) { - ovsdb_add_monitor_column(mt, column, select, + ovsdb_monitor_add_column(dbmon, table, column, select, allocated_columns); } } From 92d5d643898fd8b9ea6503f9286351747e681c15 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 19:28:13 -0700 Subject: [PATCH 077/146] jsonrpc-server: refactoring ovsdb_jsonrpc_monitor_compose_table_update() Now it simply calls ovsdb_monitor_compose_table_update(), which actually creates the json object. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index f510c6848..93773885f 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1676,8 +1676,8 @@ ovsdb_monitor_compose_row_update( * be used as part of the initial reply to a "monitor" request, false if it is * going to be used as part of an "update" notification. */ static struct json * -ovsdb_jsonrpc_monitor_compose_table_update( - const struct ovsdb_jsonrpc_monitor *monitor, bool initial) +ovsdb_monitor_compose_table_update( + const struct ovsdb_monitor *dbmon, bool initial) { struct shash_node *node; unsigned long int *changed; @@ -1685,7 +1685,7 @@ ovsdb_jsonrpc_monitor_compose_table_update( size_t max_columns; max_columns = 0; - SHASH_FOR_EACH (node, &monitor->dbmon->tables) { + SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; max_columns = MAX(max_columns, mt->n_columns); @@ -1693,7 +1693,7 @@ ovsdb_jsonrpc_monitor_compose_table_update( changed = xmalloc(bitmap_n_bytes(max_columns)); json = NULL; - SHASH_FOR_EACH (node, &monitor->dbmon->tables) { + SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; struct ovsdb_monitor_row *row, *next; struct json *table_json = NULL; @@ -1732,6 +1732,13 @@ ovsdb_jsonrpc_monitor_compose_table_update( return json; } +static struct json * +ovsdb_jsonrpc_monitor_compose_table_update( + const struct ovsdb_jsonrpc_monitor *monitor, bool initial) +{ + return ovsdb_monitor_compose_table_update(monitor->dbmon, initial); +} + static bool ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *s) { From dbc1cfbb2818b4ead18716d41e452a1b0742c70e Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 19:33:46 -0700 Subject: [PATCH 078/146] jsonrpc-server: refactoring ovsdb_jsonrpc_monitor_needs_flush split out per monitoring needs_flush() into ovsdb_monitor_needs_flush(). Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 93773885f..23aa3fd69 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1739,20 +1739,29 @@ ovsdb_jsonrpc_monitor_compose_table_update( return ovsdb_monitor_compose_table_update(monitor->dbmon, initial); } +static bool +ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon) +{ + struct shash_node *node; + + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + + if (!hmap_is_empty(&mt->changes)) { + return true; + } + } + return false; +} + static bool ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *s) { struct ovsdb_jsonrpc_monitor *m; HMAP_FOR_EACH (m, node, &s->monitors) { - struct shash_node *node; - - SHASH_FOR_EACH (node, &m->dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - - if (!hmap_is_empty(&mt->changes)) { - return true; - } + if (ovsdb_monitor_needs_flush(m->dbmon)) { + return true; } } From 51df26a68e709f357d4ee24eebfba1aacb758eb9 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 19:38:50 -0700 Subject: [PATCH 079/146] jsonrpc-server: rename ovsdb_jsonrpc_monitor_get_initial() rename ovsdb_jsonrpc_monitor_get_initial() to ovsdb_monitor_get_initial() Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 23aa3fd69..df495b0aa 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1093,8 +1093,8 @@ static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; struct ovsdb_jsonrpc_monitor *ovsdb_jsonrpc_monitor_find( struct ovsdb_jsonrpc_session *, const struct json *monitor_id); static void ovsdb_monitor_destroy(struct ovsdb_replica *); -static struct json *ovsdb_jsonrpc_monitor_get_initial( - const struct ovsdb_jsonrpc_monitor *); +static struct json *ovsdb_monitor_get_initial( + const struct ovsdb_monitor *); static bool parse_bool(struct ovsdb_parser *parser, const char *name, bool default_value) @@ -1384,7 +1384,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, } } - return jsonrpc_create_reply(ovsdb_jsonrpc_monitor_get_initial(m), + return jsonrpc_create_reply(ovsdb_monitor_get_initial(m->dbmon), request_id); error: @@ -1811,14 +1811,14 @@ ovsdb_monitor_commit(struct ovsdb_replica *replica, } static struct json * -ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m) +ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) { struct ovsdb_monitor_aux aux; struct shash_node *node; struct json *json; - ovsdb_monitor_init_aux(&aux, m->dbmon); - SHASH_FOR_EACH (node, &m->dbmon->tables) { + ovsdb_monitor_init_aux(&aux, dbmon); + SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; if (mt->select & OJMS_INITIAL) { @@ -1829,7 +1829,7 @@ ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m) } } } - json = ovsdb_jsonrpc_monitor_compose_table_update(m, true); + json = ovsdb_monitor_compose_table_update(dbmon, true); return json ? json : json_object_create(); } From 88b633082c0de646bebf1966bd06299692e4f9ba Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 19:49:13 -0700 Subject: [PATCH 080/146] jsonrpc-server: refactoring ovsdb_monitor_destroy() Add ovsdb_monitor_destroy() function to properly cleanup ovsdb_monitor. It is also responsible for unhook from the replica chain. The replica destroy callback is now called ovsdb_monitor_destroy_callback() Minor variable renaming in ovsdb_monitor_create() to make it more consistent. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 59 +++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index df495b0aa..6bbda5296 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -43,6 +43,7 @@ VLOG_DEFINE_THIS_MODULE(ovsdb_jsonrpc_server); struct ovsdb_jsonrpc_remote; struct ovsdb_jsonrpc_session; +struct ovsdb_jsonrpc_monitor; /* Message rate-limiting. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -81,6 +82,7 @@ static void ovsdb_jsonrpc_trigger_complete_done( static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_create( struct ovsdb_jsonrpc_session *, struct ovsdb *, struct json *params, const struct json *request_id); +static void ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *); static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_cancel( struct ovsdb_jsonrpc_session *, struct json_array *params, @@ -1092,7 +1094,7 @@ static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; struct ovsdb_jsonrpc_monitor *ovsdb_jsonrpc_monitor_find( struct ovsdb_jsonrpc_session *, const struct json *monitor_id); -static void ovsdb_monitor_destroy(struct ovsdb_replica *); +static void ovsdb_monitor_destroy(struct ovsdb_monitor *); static struct json *ovsdb_monitor_get_initial( const struct ovsdb_monitor *); @@ -1252,16 +1254,16 @@ ovsdb_monitor_create(struct ovsdb *db, struct ovsdb_jsonrpc_monitor *jsonrpc_monitor, const struct ovsdb_replica_class *replica_class) { - struct ovsdb_monitor *m; + struct ovsdb_monitor *dbmon; - m = xzalloc(sizeof *m); + dbmon = xzalloc(sizeof *dbmon); - ovsdb_replica_init(&m->replica, replica_class); - ovsdb_add_replica(db, &m->replica); - m->jsonrpc_monitor = jsonrpc_monitor; - shash_init(&m->tables); + ovsdb_replica_init(&dbmon->replica, replica_class); + ovsdb_add_replica(db, &dbmon->replica); + dbmon->jsonrpc_monitor = jsonrpc_monitor; + shash_init(&dbmon->tables); - return m; + return dbmon; } static void @@ -1389,7 +1391,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, error: if (m) { - ovsdb_remove_replica(m->db, &m->dbmon->replica); + ovsdb_jsonrpc_monitor_destroy(m); } json = ovsdb_error_to_json(error); @@ -1413,7 +1415,7 @@ ovsdb_jsonrpc_monitor_cancel(struct ovsdb_jsonrpc_session *s, return jsonrpc_create_error(json_string_create("unknown monitor"), request_id); } else { - ovsdb_remove_replica(m->db, &m->dbmon->replica); + ovsdb_jsonrpc_monitor_destroy(m); return jsonrpc_create_reply(json_object_create(), request_id); } } @@ -1425,7 +1427,7 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) struct ovsdb_jsonrpc_monitor *m, *next; HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { - ovsdb_remove_replica(m->db, &m->dbmon->replica); + ovsdb_jsonrpc_monitor_destroy(m); } } @@ -1834,14 +1836,22 @@ ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) } static void -ovsdb_monitor_destroy(struct ovsdb_replica *replica) +ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *m) +{ + json_destroy(m->monitor_id); + hmap_remove(&m->session->monitors, &m->node); + ovsdb_monitor_destroy(m->dbmon); + free(m); +} + +static void +ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) { - struct ovsdb_monitor *m = ovsdb_monitor_cast(replica); - struct ovsdb_jsonrpc_monitor *jsonrpc_monitor = m->jsonrpc_monitor; struct shash_node *node; - json_destroy(jsonrpc_monitor->monitor_id); - SHASH_FOR_EACH (node, &m->tables) { + list_remove(&dbmon->replica.node); + + SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; struct ovsdb_monitor_row *row, *next; @@ -1854,13 +1864,20 @@ ovsdb_monitor_destroy(struct ovsdb_replica *replica) free(mt->columns); free(mt); } - shash_destroy(&m->tables); - hmap_remove(&jsonrpc_monitor->session->monitors, &jsonrpc_monitor->node); - free(jsonrpc_monitor); - free(m); + shash_destroy(&dbmon->tables); + free(dbmon); +} + +static void +ovsdb_monitor_destroy_callback(struct ovsdb_replica *replica) +{ + struct ovsdb_monitor *dbmon = ovsdb_monitor_cast(replica); + struct ovsdb_jsonrpc_monitor *m = dbmon->jsonrpc_monitor; + + ovsdb_jsonrpc_monitor_destroy(m); } static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class = { ovsdb_monitor_commit, - ovsdb_monitor_destroy + ovsdb_monitor_destroy_callback, }; From 2fa1df7b88611f8858882147b5db122c5644799c Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 11 Mar 2015 21:20:44 -0700 Subject: [PATCH 081/146] jsonrpc-server: Split out monitor backend functions to monitor.c/h Added new files monitor.[ch] for monitor backend functions. There is no functional changes. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/automake.mk | 2 + ovsdb/jsonrpc-server.c | 593 ++--------------------------------------- ovsdb/jsonrpc-server.h | 3 + ovsdb/monitor.c | 581 ++++++++++++++++++++++++++++++++++++++++ ovsdb/monitor.h | 58 ++++ 5 files changed, 661 insertions(+), 576 deletions(-) create mode 100644 ovsdb/monitor.c create mode 100644 ovsdb/monitor.h diff --git a/ovsdb/automake.mk b/ovsdb/automake.mk index a66974a57..cc3e656dc 100644 --- a/ovsdb/automake.mk +++ b/ovsdb/automake.mk @@ -20,6 +20,8 @@ ovsdb_libovsdb_la_SOURCES = \ ovsdb/mutation.h \ ovsdb/ovsdb.c \ ovsdb/ovsdb.h \ + ovsdb/monitor.c \ + ovsdb/monitor.h \ ovsdb/query.c \ ovsdb/query.h \ ovsdb/row.c \ diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 6bbda5296..55148971a 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -37,13 +37,13 @@ #include "timeval.h" #include "transaction.h" #include "trigger.h" +#include "monitor.h" #include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_jsonrpc_server); struct ovsdb_jsonrpc_remote; struct ovsdb_jsonrpc_session; -struct ovsdb_jsonrpc_monitor; /* Message rate-limiting. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -82,7 +82,6 @@ static void ovsdb_jsonrpc_trigger_complete_done( static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_create( struct ovsdb_jsonrpc_session *, struct ovsdb *, struct json *params, const struct json *request_id); -static void ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *); static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_cancel( struct ovsdb_jsonrpc_session *, struct json_array *params, @@ -1028,86 +1027,16 @@ ovsdb_jsonrpc_trigger_complete_done(struct ovsdb_jsonrpc_session *s) } } -/* database table monitors. */ - -enum ovsdb_monitor_selection { - OJMS_INITIAL = 1 << 0, /* All rows when monitor is created. */ - OJMS_INSERT = 1 << 1, /* New rows. */ - OJMS_DELETE = 1 << 2, /* Deleted rows. */ - OJMS_MODIFY = 1 << 3 /* Modified rows. */ -}; - -/* A particular column being monitored. */ -struct ovsdb_monitor_column { - const struct ovsdb_column *column; - enum ovsdb_monitor_selection select; -}; - -/* A row that has changed in a monitored table. */ -struct ovsdb_monitor_row { - struct hmap_node hmap_node; /* In ovsdb_jsonrpc_monitor_table.changes. */ - struct uuid uuid; /* UUID of row that changed. */ - struct ovsdb_datum *old; /* Old data, NULL for an inserted row. */ - struct ovsdb_datum *new; /* New data, NULL for a deleted row. */ -}; - -/* A particular table being monitored. */ -struct ovsdb_monitor_table { - const struct ovsdb_table *table; - - /* This is the union (bitwise-OR) of the 'select' values in all of the - * members of 'columns' below. */ - enum ovsdb_monitor_selection select; - - /* Columns being monitored. */ - struct ovsdb_monitor_column *columns; - size_t n_columns; - - /* Contains 'struct ovsdb_monitor_row's for rows that have been - * updated but not yet flushed to the jsonrpc connection. */ - struct hmap changes; -}; - -struct ovsdb_jsonrpc_monitor; -/* Backend monitor. - * - * ovsdb_monitor keep track of the ovsdb changes. - */ -/* A collection of tables being monitored. */ -struct ovsdb_monitor { - struct ovsdb_replica replica; - struct shash tables; /* Holds "struct ovsdb_monitor_table"s. */ - struct ovsdb_jsonrpc_monitor *jsonrpc_monitor; -}; - /* Jsonrpc front end monitor. */ struct ovsdb_jsonrpc_monitor { struct ovsdb_jsonrpc_session *session; struct ovsdb *db; struct hmap_node node; /* In ovsdb_jsonrpc_session's "monitors". */ - struct json *monitor_id; struct ovsdb_monitor *dbmon; }; -static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; - -struct ovsdb_jsonrpc_monitor *ovsdb_jsonrpc_monitor_find( - struct ovsdb_jsonrpc_session *, const struct json *monitor_id); -static void ovsdb_monitor_destroy(struct ovsdb_monitor *); -static struct json *ovsdb_monitor_get_initial( - const struct ovsdb_monitor *); - -static bool -parse_bool(struct ovsdb_parser *parser, const char *name, bool default_value) -{ - const struct json *json; - - json = ovsdb_parser_member(parser, name, OP_BOOLEAN | OP_OPTIONAL); - return json ? json_boolean(json) : default_value; -} - -struct ovsdb_jsonrpc_monitor * +static struct ovsdb_jsonrpc_monitor * ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s, const struct json *monitor_id) { @@ -1122,45 +1051,13 @@ ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s, return NULL; } -static void -ovsdb_monitor_add_column(struct ovsdb_monitor *dbmon, - const struct ovsdb_table *table, - const struct ovsdb_column *column, - enum ovsdb_monitor_selection select, - size_t *allocated_columns) +static bool +parse_bool(struct ovsdb_parser *parser, const char *name, bool default_value) { - struct ovsdb_monitor_table *mt; - struct ovsdb_monitor_column *c; + const struct json *json; - mt = shash_find_data(&dbmon->tables, table->schema->name); - - if (mt->n_columns >= *allocated_columns) { - mt->columns = x2nrealloc(mt->columns, allocated_columns, - sizeof *mt->columns); - } - - c = &mt->columns[mt->n_columns++]; - c->column = column; - c->select = select; -} - -static int -compare_ovsdb_monitor_column(const void *a_, const void *b_) -{ - const struct ovsdb_monitor_column *a = a_; - const struct ovsdb_monitor_column *b = b_; - - return a->column < b->column ? -1 : a->column > b->column; -} - -static void -ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, - const struct ovsdb_table *table, - enum ovsdb_monitor_selection select) -{ - struct ovsdb_monitor_table * mt; - mt = shash_find_data(&dbmon->tables, table->schema->name); - mt->select |= select; + json = ovsdb_parser_member(parser, name, OP_BOOLEAN | OP_OPTIONAL); + return json ? json_boolean(json) : default_value; } static struct ovsdb_error * OVS_WARN_UNUSED_RESULT @@ -1249,61 +1146,6 @@ ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_monitor *dbmon, return NULL; } -static struct ovsdb_monitor * -ovsdb_monitor_create(struct ovsdb *db, - struct ovsdb_jsonrpc_monitor *jsonrpc_monitor, - const struct ovsdb_replica_class *replica_class) -{ - struct ovsdb_monitor *dbmon; - - dbmon = xzalloc(sizeof *dbmon); - - ovsdb_replica_init(&dbmon->replica, replica_class); - ovsdb_add_replica(db, &dbmon->replica); - dbmon->jsonrpc_monitor = jsonrpc_monitor; - shash_init(&dbmon->tables); - - return dbmon; -} - -static void -ovsdb_monitor_add_table(struct ovsdb_monitor *m, - const struct ovsdb_table *table) -{ - struct ovsdb_monitor_table *mt; - - mt = xzalloc(sizeof *mt); - mt->table = table; - hmap_init(&mt->changes); - shash_add(&m->tables, table->schema->name, mt); -} - -/* Check for duplicated column names. Return the first - * duplicated column's name if found. Otherwise return - * NULL. */ -static const char * OVS_WARN_UNUSED_RESULT -ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *m, - const struct ovsdb_table *table) -{ - struct ovsdb_monitor_table *mt; - int i; - - mt = shash_find_data(&m->tables, table->schema->name); - - if (mt) { - /* Check for duplicate columns. */ - qsort(mt->columns, mt->n_columns, sizeof *mt->columns, - compare_ovsdb_monitor_column); - for (i = 1; i < mt->n_columns; i++) { - if (mt->columns[i].column == mt->columns[i - 1].column) { - return mt->columns[i].column->name; - } - } - } - - return NULL; -} - static struct jsonrpc_msg * ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, struct json *params, @@ -1335,7 +1177,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, m = xzalloc(sizeof *m); m->session = s; m->db = db; - m->dbmon = ovsdb_monitor_create(db, m, &ovsdb_jsonrpc_replica_class); + m->dbmon = ovsdb_monitor_create(db, m); hmap_insert(&s->monitors, &m->node, json_hash(monitor_id, 0)); m->monitor_id = json_clone(monitor_id); @@ -1431,309 +1273,6 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) } } -static struct ovsdb_monitor * -ovsdb_monitor_cast(struct ovsdb_replica *replica) -{ - ovs_assert(replica->class == &ovsdb_jsonrpc_replica_class); - return CONTAINER_OF(replica, struct ovsdb_monitor, replica); -} - -struct ovsdb_monitor_aux { - const struct ovsdb_monitor *monitor; - struct ovsdb_monitor_table *mt; -}; - -/* Finds and returns the ovsdb_monitor_row in 'mt->changes' for the - * given 'uuid', or NULL if there is no such row. */ -static struct ovsdb_monitor_row * -ovsdb_monitor_row_find(const struct ovsdb_monitor_table *mt, - const struct uuid *uuid) -{ - struct ovsdb_monitor_row *row; - - HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &mt->changes) { - if (uuid_equals(uuid, &row->uuid)) { - return row; - } - } - return NULL; -} - -/* Allocates an array of 'mt->n_columns' ovsdb_datums and initializes them as - * copies of the data in 'row' drawn from the columns represented by - * mt->columns[]. Returns the array. - * - * If 'row' is NULL, returns NULL. */ -static struct ovsdb_datum * -clone_monitor_row_data(const struct ovsdb_monitor_table *mt, - const struct ovsdb_row *row) -{ - struct ovsdb_datum *data; - size_t i; - - if (!row) { - return NULL; - } - - data = xmalloc(mt->n_columns * sizeof *data); - for (i = 0; i < mt->n_columns; i++) { - const struct ovsdb_column *c = mt->columns[i].column; - const struct ovsdb_datum *src = &row->fields[c->index]; - struct ovsdb_datum *dst = &data[i]; - const struct ovsdb_type *type = &c->type; - - ovsdb_datum_clone(dst, src, type); - } - return data; -} - -/* Replaces the mt->n_columns ovsdb_datums in row[] by copies of the data from - * in 'row' drawn from the columns represented by mt->columns[]. */ -static void -update_monitor_row_data(const struct ovsdb_monitor_table *mt, - const struct ovsdb_row *row, - struct ovsdb_datum *data) -{ - size_t i; - - for (i = 0; i < mt->n_columns; i++) { - const struct ovsdb_column *c = mt->columns[i].column; - const struct ovsdb_datum *src = &row->fields[c->index]; - struct ovsdb_datum *dst = &data[i]; - const struct ovsdb_type *type = &c->type; - - if (!ovsdb_datum_equals(src, dst, type)) { - ovsdb_datum_destroy(dst, type); - ovsdb_datum_clone(dst, src, type); - } - } -} - -/* Frees all of the mt->n_columns ovsdb_datums in data[], using the types taken - * from mt->columns[], plus 'data' itself. */ -static void -free_monitor_row_data(const struct ovsdb_monitor_table *mt, - struct ovsdb_datum *data) -{ - if (data) { - size_t i; - - for (i = 0; i < mt->n_columns; i++) { - const struct ovsdb_column *c = mt->columns[i].column; - - ovsdb_datum_destroy(&data[i], &c->type); - } - free(data); - } -} - -/* Frees 'row', which must have been created from 'mt'. */ -static void -ovsdb_monitor_row_destroy(const struct ovsdb_monitor_table *mt, - struct ovsdb_monitor_row *row) -{ - if (row) { - free_monitor_row_data(mt, row->old); - free_monitor_row_data(mt, row->new); - free(row); - } -} - -static bool -ovsdb_monitor_change_cb(const struct ovsdb_row *old, - const struct ovsdb_row *new, - const unsigned long int *changed OVS_UNUSED, - void *aux_) -{ - struct ovsdb_monitor_aux *aux = aux_; - const struct ovsdb_monitor *m = aux->monitor; - struct ovsdb_table *table = new ? new->table : old->table; - const struct uuid *uuid = ovsdb_row_get_uuid(new ? new : old); - struct ovsdb_monitor_row *change; - struct ovsdb_monitor_table *mt; - - if (!aux->mt || table != aux->mt->table) { - aux->mt = shash_find_data(&m->tables, table->schema->name); - if (!aux->mt) { - /* We don't care about rows in this table at all. Tell the caller - * to skip it. */ - return false; - } - } - mt = aux->mt; - - change = ovsdb_monitor_row_find(mt, uuid); - if (!change) { - change = xmalloc(sizeof *change); - hmap_insert(&mt->changes, &change->hmap_node, uuid_hash(uuid)); - change->uuid = *uuid; - change->old = clone_monitor_row_data(mt, old); - change->new = clone_monitor_row_data(mt, new); - } else { - if (new) { - update_monitor_row_data(mt, new, change->new); - } else { - free_monitor_row_data(mt, change->new); - change->new = NULL; - - if (!change->old) { - /* This row was added then deleted. Forget about it. */ - hmap_remove(&mt->changes, &change->hmap_node); - free(change); - } - } - } - return true; -} - -/* Returns JSON for a (as described in RFC 7047) for 'row' within - * 'mt', or NULL if no row update should be sent. - * - * The caller should specify 'initial' as true if the returned JSON is going to - * be used as part of the initial reply to a "monitor" request, false if it is - * going to be used as part of an "update" notification. - * - * 'changed' must be a scratch buffer for internal use that is at least - * bitmap_n_bytes(mt->n_columns) bytes long. */ -static struct json * -ovsdb_monitor_compose_row_update( - const struct ovsdb_monitor_table *mt, - const struct ovsdb_monitor_row *row, - bool initial, unsigned long int *changed) -{ - enum ovsdb_monitor_selection type; - struct json *old_json, *new_json; - struct json *row_json; - size_t i; - - type = (initial ? OJMS_INITIAL - : !row->old ? OJMS_INSERT - : !row->new ? OJMS_DELETE - : OJMS_MODIFY); - if (!(mt->select & type)) { - return NULL; - } - - if (type == OJMS_MODIFY) { - size_t n_changes; - - n_changes = 0; - memset(changed, 0, bitmap_n_bytes(mt->n_columns)); - for (i = 0; i < mt->n_columns; i++) { - const struct ovsdb_column *c = mt->columns[i].column; - if (!ovsdb_datum_equals(&row->old[i], &row->new[i], &c->type)) { - bitmap_set1(changed, i); - n_changes++; - } - } - if (!n_changes) { - /* No actual changes: presumably a row changed and then - * changed back later. */ - return NULL; - } - } - - row_json = json_object_create(); - old_json = new_json = NULL; - if (type & (OJMS_DELETE | OJMS_MODIFY)) { - old_json = json_object_create(); - json_object_put(row_json, "old", old_json); - } - if (type & (OJMS_INITIAL | OJMS_INSERT | OJMS_MODIFY)) { - new_json = json_object_create(); - json_object_put(row_json, "new", new_json); - } - for (i = 0; i < mt->n_columns; i++) { - const struct ovsdb_monitor_column *c = &mt->columns[i]; - - if (!(type & c->select)) { - /* We don't care about this type of change for this - * particular column (but we will care about it for some - * other column). */ - continue; - } - - if ((type == OJMS_MODIFY && bitmap_is_set(changed, i)) - || type == OJMS_DELETE) { - json_object_put(old_json, c->column->name, - ovsdb_datum_to_json(&row->old[i], - &c->column->type)); - } - if (type & (OJMS_INITIAL | OJMS_INSERT | OJMS_MODIFY)) { - json_object_put(new_json, c->column->name, - ovsdb_datum_to_json(&row->new[i], - &c->column->type)); - } - } - - return row_json; -} - -/* Constructs and returns JSON for a object (as described in - * RFC 7047) for all the outstanding changes within 'monitor', and deletes all - * the outstanding changes from 'monitor'. Returns NULL if no update needs to - * be sent. - * - * The caller should specify 'initial' as true if the returned JSON is going to - * be used as part of the initial reply to a "monitor" request, false if it is - * going to be used as part of an "update" notification. */ -static struct json * -ovsdb_monitor_compose_table_update( - const struct ovsdb_monitor *dbmon, bool initial) -{ - struct shash_node *node; - unsigned long int *changed; - struct json *json; - size_t max_columns; - - max_columns = 0; - SHASH_FOR_EACH (node, &dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - - max_columns = MAX(max_columns, mt->n_columns); - } - changed = xmalloc(bitmap_n_bytes(max_columns)); - - json = NULL; - SHASH_FOR_EACH (node, &dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - struct ovsdb_monitor_row *row, *next; - struct json *table_json = NULL; - - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { - struct json *row_json; - - row_json = ovsdb_monitor_compose_row_update( - mt, row, initial, changed); - if (row_json) { - char uuid[UUID_LEN + 1]; - - /* Create JSON object for transaction overall. */ - if (!json) { - json = json_object_create(); - } - - /* Create JSON object for transaction on this table. */ - if (!table_json) { - table_json = json_object_create(); - json_object_put(json, mt->table->schema->name, table_json); - } - - /* Add JSON row to JSON table. */ - snprintf(uuid, sizeof uuid, UUID_FMT, UUID_ARGS(&row->uuid)); - json_object_put(table_json, uuid, row_json); - } - - hmap_remove(&mt->changes, &row->hmap_node); - ovsdb_monitor_row_destroy(mt, row); - } - } - - free(changed); - - return json; -} - static struct json * ovsdb_jsonrpc_monitor_compose_table_update( const struct ovsdb_jsonrpc_monitor *monitor, bool initial) @@ -1741,21 +1280,6 @@ ovsdb_jsonrpc_monitor_compose_table_update( return ovsdb_monitor_compose_table_update(monitor->dbmon, initial); } -static bool -ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon) -{ - struct shash_node *node; - - SHASH_FOR_EACH (node, &dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - - if (!hmap_is_empty(&mt->changes)) { - return true; - } - } - return false; -} - static bool ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *s) { @@ -1770,6 +1294,15 @@ ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *s) return false; } +void +ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *m) +{ + json_destroy(m->monitor_id); + hmap_remove(&m->session->monitors, &m->node); + ovsdb_monitor_destroy(m->dbmon); + free(m); +} + static void ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *s) { @@ -1789,95 +1322,3 @@ ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *s) } } } - -static void -ovsdb_monitor_init_aux(struct ovsdb_monitor_aux *aux, - const struct ovsdb_monitor *m) -{ - aux->monitor = m; - aux->mt = NULL; -} - -static struct ovsdb_error * -ovsdb_monitor_commit(struct ovsdb_replica *replica, - const struct ovsdb_txn *txn, - bool durable OVS_UNUSED) -{ - struct ovsdb_monitor *m = ovsdb_monitor_cast(replica); - struct ovsdb_monitor_aux aux; - - ovsdb_monitor_init_aux(&aux, m); - ovsdb_txn_for_each_change(txn, ovsdb_monitor_change_cb, &aux); - - return NULL; -} - -static struct json * -ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) -{ - struct ovsdb_monitor_aux aux; - struct shash_node *node; - struct json *json; - - ovsdb_monitor_init_aux(&aux, dbmon); - SHASH_FOR_EACH (node, &dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - - if (mt->select & OJMS_INITIAL) { - struct ovsdb_row *row; - - HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { - ovsdb_monitor_change_cb(NULL, row, NULL, &aux); - } - } - } - json = ovsdb_monitor_compose_table_update(dbmon, true); - return json ? json : json_object_create(); -} - -static void -ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *m) -{ - json_destroy(m->monitor_id); - hmap_remove(&m->session->monitors, &m->node); - ovsdb_monitor_destroy(m->dbmon); - free(m); -} - -static void -ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) -{ - struct shash_node *node; - - list_remove(&dbmon->replica.node); - - SHASH_FOR_EACH (node, &dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - struct ovsdb_monitor_row *row, *next; - - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { - hmap_remove(&mt->changes, &row->hmap_node); - ovsdb_monitor_row_destroy(mt, row); - } - hmap_destroy(&mt->changes); - - free(mt->columns); - free(mt); - } - shash_destroy(&dbmon->tables); - free(dbmon); -} - -static void -ovsdb_monitor_destroy_callback(struct ovsdb_replica *replica) -{ - struct ovsdb_monitor *dbmon = ovsdb_monitor_cast(replica); - struct ovsdb_jsonrpc_monitor *m = dbmon->jsonrpc_monitor; - - ovsdb_jsonrpc_monitor_destroy(m); -} - -static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class = { - ovsdb_monitor_commit, - ovsdb_monitor_destroy_callback, -}; diff --git a/ovsdb/jsonrpc-server.h b/ovsdb/jsonrpc-server.h index e6a1642a5..fce8b7bd0 100644 --- a/ovsdb/jsonrpc-server.h +++ b/ovsdb/jsonrpc-server.h @@ -69,4 +69,7 @@ void ovsdb_jsonrpc_server_wait(struct ovsdb_jsonrpc_server *); void ovsdb_jsonrpc_server_get_memory_usage(const struct ovsdb_jsonrpc_server *, struct simap *usage); +struct ovsdb_jsonrpc_monitor; +void ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *); + #endif /* ovsdb/jsonrpc-server.h */ diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c new file mode 100644 index 000000000..a1aeb5f51 --- /dev/null +++ b/ovsdb/monitor.c @@ -0,0 +1,581 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "bitmap.h" +#include "column.h" +#include "dynamic-string.h" +#include "json.h" +#include "jsonrpc.h" +#include "ovsdb-error.h" +#include "ovsdb-parser.h" +#include "ovsdb.h" +#include "row.h" +#include "simap.h" +#include "table.h" +#include "timeval.h" +#include "transaction.h" +#include "jsonrpc-server.h" +#include "monitor.h" +#include "openvswitch/vlog.h" + + +static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; + +/* Backend monitor. + * + * ovsdb_monitor keep track of the ovsdb changes. + */ + +/* A collection of tables being monitored. */ +struct ovsdb_monitor { + struct ovsdb_replica replica; + struct shash tables; /* Holds "struct ovsdb_monitor_table"s. */ + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor; + struct ovsdb *db; +}; + +/* A particular column being monitored. */ +struct ovsdb_monitor_column { + const struct ovsdb_column *column; + enum ovsdb_monitor_selection select; +}; + +/* A row that has changed in a monitored table. */ +struct ovsdb_monitor_row { + struct hmap_node hmap_node; /* In ovsdb_jsonrpc_monitor_table.changes. */ + struct uuid uuid; /* UUID of row that changed. */ + struct ovsdb_datum *old; /* Old data, NULL for an inserted row. */ + struct ovsdb_datum *new; /* New data, NULL for a deleted row. */ +}; + +/* A particular table being monitored. */ +struct ovsdb_monitor_table { + const struct ovsdb_table *table; + + /* This is the union (bitwise-OR) of the 'select' values in all of the + * members of 'columns' below. */ + enum ovsdb_monitor_selection select; + + /* Columns being monitored. */ + struct ovsdb_monitor_column *columns; + size_t n_columns; + + /* Contains 'struct ovsdb_monitor_row's for rows that have been + * updated but not yet flushed to the jsonrpc connection. */ + struct hmap changes; +}; + +static int +compare_ovsdb_monitor_column(const void *a_, const void *b_) +{ + const struct ovsdb_monitor_column *a = a_; + const struct ovsdb_monitor_column *b = b_; + + return a->column < b->column ? -1 : a->column > b->column; +} + +static struct ovsdb_monitor * +ovsdb_monitor_cast(struct ovsdb_replica *replica) +{ + ovs_assert(replica->class == &ovsdb_jsonrpc_replica_class); + return CONTAINER_OF(replica, struct ovsdb_monitor, replica); +} + +/* Finds and returns the ovsdb_monitor_row in 'mt->changes' for the + * given 'uuid', or NULL if there is no such row. */ +static struct ovsdb_monitor_row * +ovsdb_monitor_row_find(const struct ovsdb_monitor_table *mt, + const struct uuid *uuid) +{ + struct ovsdb_monitor_row *row; + + HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &mt->changes) { + if (uuid_equals(uuid, &row->uuid)) { + return row; + } + } + return NULL; +} + +/* Allocates an array of 'mt->n_columns' ovsdb_datums and initializes them as + * copies of the data in 'row' drawn from the columns represented by + * mt->columns[]. Returns the array. + * + * If 'row' is NULL, returns NULL. */ +static struct ovsdb_datum * +clone_monitor_row_data(const struct ovsdb_monitor_table *mt, + const struct ovsdb_row *row) +{ + struct ovsdb_datum *data; + size_t i; + + if (!row) { + return NULL; + } + + data = xmalloc(mt->n_columns * sizeof *data); + for (i = 0; i < mt->n_columns; i++) { + const struct ovsdb_column *c = mt->columns[i].column; + const struct ovsdb_datum *src = &row->fields[c->index]; + struct ovsdb_datum *dst = &data[i]; + const struct ovsdb_type *type = &c->type; + + ovsdb_datum_clone(dst, src, type); + } + return data; +} + +/* Replaces the mt->n_columns ovsdb_datums in row[] by copies of the data from + * in 'row' drawn from the columns represented by mt->columns[]. */ +static void +update_monitor_row_data(const struct ovsdb_monitor_table *mt, + const struct ovsdb_row *row, + struct ovsdb_datum *data) +{ + size_t i; + + for (i = 0; i < mt->n_columns; i++) { + const struct ovsdb_column *c = mt->columns[i].column; + const struct ovsdb_datum *src = &row->fields[c->index]; + struct ovsdb_datum *dst = &data[i]; + const struct ovsdb_type *type = &c->type; + + if (!ovsdb_datum_equals(src, dst, type)) { + ovsdb_datum_destroy(dst, type); + ovsdb_datum_clone(dst, src, type); + } + } +} + +/* Frees all of the mt->n_columns ovsdb_datums in data[], using the types taken + * from mt->columns[], plus 'data' itself. */ +static void +free_monitor_row_data(const struct ovsdb_monitor_table *mt, + struct ovsdb_datum *data) +{ + if (data) { + size_t i; + + for (i = 0; i < mt->n_columns; i++) { + const struct ovsdb_column *c = mt->columns[i].column; + + ovsdb_datum_destroy(&data[i], &c->type); + } + free(data); + } +} + +/* Frees 'row', which must have been created from 'mt'. */ +static void +ovsdb_monitor_row_destroy(const struct ovsdb_monitor_table *mt, + struct ovsdb_monitor_row *row) +{ + if (row) { + free_monitor_row_data(mt, row->old); + free_monitor_row_data(mt, row->new); + free(row); + } +} + +struct ovsdb_monitor * +ovsdb_monitor_create(struct ovsdb *db, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor) +{ + struct ovsdb_monitor *dbmon; + + dbmon = xzalloc(sizeof *dbmon); + + ovsdb_replica_init(&dbmon->replica, &ovsdb_jsonrpc_replica_class); + ovsdb_add_replica(db, &dbmon->replica); + dbmon->jsonrpc_monitor = jsonrpc_monitor; + dbmon->db = db; + shash_init(&dbmon->tables); + + return dbmon; +} + +void +ovsdb_monitor_add_table(struct ovsdb_monitor *m, + const struct ovsdb_table *table) +{ + struct ovsdb_monitor_table *mt; + + mt = xzalloc(sizeof *mt); + mt->table = table; + hmap_init(&mt->changes); + shash_add(&m->tables, table->schema->name, mt); +} + +void +ovsdb_monitor_add_column(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, + const struct ovsdb_column *column, + enum ovsdb_monitor_selection select, + size_t *allocated_columns) +{ + struct ovsdb_monitor_table *mt; + struct ovsdb_monitor_column *c; + + mt = shash_find_data(&dbmon->tables, table->schema->name); + + if (mt->n_columns >= *allocated_columns) { + mt->columns = x2nrealloc(mt->columns, allocated_columns, + sizeof *mt->columns); + } + + mt->select |= select; + c = &mt->columns[mt->n_columns++]; + c->column = column; + c->select = select; +} + +/* Check for duplicated column names. Return the first + * duplicated column's name if found. Otherwise return + * NULL. */ +const char * OVS_WARN_UNUSED_RESULT +ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *m, + const struct ovsdb_table *table) +{ + struct ovsdb_monitor_table *mt; + int i; + + mt = shash_find_data(&m->tables, table->schema->name); + + if (mt) { + /* Check for duplicate columns. */ + qsort(mt->columns, mt->n_columns, sizeof *mt->columns, + compare_ovsdb_monitor_column); + for (i = 1; i < mt->n_columns; i++) { + if (mt->columns[i].column == mt->columns[i - 1].column) { + return mt->columns[i].column->name; + } + } + } + + return NULL; +} + +/* Returns JSON for a (as described in RFC 7047) for 'row' within + * 'mt', or NULL if no row update should be sent. + * + * The caller should specify 'initial' as true if the returned JSON is going to + * be used as part of the initial reply to a "monitor" request, false if it is + * going to be used as part of an "update" notification. + * + * 'changed' must be a scratch buffer for internal use that is at least + * bitmap_n_bytes(mt->n_columns) bytes long. */ +static struct json * +ovsdb_monitor_compose_row_update( + const struct ovsdb_monitor_table *mt, + const struct ovsdb_monitor_row *row, + bool initial, unsigned long int *changed) +{ + enum ovsdb_monitor_selection type; + struct json *old_json, *new_json; + struct json *row_json; + size_t i; + + type = (initial ? OJMS_INITIAL + : !row->old ? OJMS_INSERT + : !row->new ? OJMS_DELETE + : OJMS_MODIFY); + if (!(mt->select & type)) { + return NULL; + } + + if (type == OJMS_MODIFY) { + size_t n_changes; + + n_changes = 0; + memset(changed, 0, bitmap_n_bytes(mt->n_columns)); + for (i = 0; i < mt->n_columns; i++) { + const struct ovsdb_column *c = mt->columns[i].column; + if (!ovsdb_datum_equals(&row->old[i], &row->new[i], &c->type)) { + bitmap_set1(changed, i); + n_changes++; + } + } + if (!n_changes) { + /* No actual changes: presumably a row changed and then + * changed back later. */ + return NULL; + } + } + + row_json = json_object_create(); + old_json = new_json = NULL; + if (type & (OJMS_DELETE | OJMS_MODIFY)) { + old_json = json_object_create(); + json_object_put(row_json, "old", old_json); + } + if (type & (OJMS_INITIAL | OJMS_INSERT | OJMS_MODIFY)) { + new_json = json_object_create(); + json_object_put(row_json, "new", new_json); + } + for (i = 0; i < mt->n_columns; i++) { + const struct ovsdb_monitor_column *c = &mt->columns[i]; + + if (!(type & c->select)) { + /* We don't care about this type of change for this + * particular column (but we will care about it for some + * other column). */ + continue; + } + + if ((type == OJMS_MODIFY && bitmap_is_set(changed, i)) + || type == OJMS_DELETE) { + json_object_put(old_json, c->column->name, + ovsdb_datum_to_json(&row->old[i], + &c->column->type)); + } + if (type & (OJMS_INITIAL | OJMS_INSERT | OJMS_MODIFY)) { + json_object_put(new_json, c->column->name, + ovsdb_datum_to_json(&row->new[i], + &c->column->type)); + } + } + + return row_json; +} + +/* Constructs and returns JSON for a object (as described in + * RFC 7047) for all the outstanding changes within 'monitor', and deletes all + * the outstanding changes from 'monitor'. Returns NULL if no update needs to + * be sent. + * + * The caller should specify 'initial' as true if the returned JSON is going to + * be used as part of the initial reply to a "monitor" request, false if it is + * going to be used as part of an "update" notification. */ +struct json * +ovsdb_monitor_compose_table_update( + const struct ovsdb_monitor *dbmon, bool initial) +{ + struct shash_node *node; + unsigned long int *changed; + struct json *json; + size_t max_columns; + + max_columns = 0; + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + + max_columns = MAX(max_columns, mt->n_columns); + } + changed = xmalloc(bitmap_n_bytes(max_columns)); + + json = NULL; + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + struct ovsdb_monitor_row *row, *next; + struct json *table_json = NULL; + + HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { + struct json *row_json; + + row_json = ovsdb_monitor_compose_row_update( + mt, row, initial, changed); + if (row_json) { + char uuid[UUID_LEN + 1]; + + /* Create JSON object for transaction overall. */ + if (!json) { + json = json_object_create(); + } + + /* Create JSON object for transaction on this table. */ + if (!table_json) { + table_json = json_object_create(); + json_object_put(json, mt->table->schema->name, table_json); + } + + /* Add JSON row to JSON table. */ + snprintf(uuid, sizeof uuid, UUID_FMT, UUID_ARGS(&row->uuid)); + json_object_put(table_json, uuid, row_json); + } + + hmap_remove(&mt->changes, &row->hmap_node); + ovsdb_monitor_row_destroy(mt, row); + } + } + + free(changed); + return json; +} + +bool +ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon) +{ + struct shash_node *node; + + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + + if (!hmap_is_empty(&mt->changes)) { + return true; + } + } + return false; +} + +void +ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, + enum ovsdb_monitor_selection select) +{ + struct ovsdb_monitor_table * mt; + + mt = shash_find_data(&dbmon->tables, table->schema->name); + mt->select |= select; +} + +struct ovsdb_monitor_aux { + const struct ovsdb_monitor *monitor; + struct ovsdb_monitor_table *mt; +}; + +static void +ovsdb_monitor_init_aux(struct ovsdb_monitor_aux *aux, + const struct ovsdb_monitor *m) +{ + aux->monitor = m; + aux->mt = NULL; +} + +static bool +ovsdb_monitor_change_cb(const struct ovsdb_row *old, + const struct ovsdb_row *new, + const unsigned long int *changed OVS_UNUSED, + void *aux_) +{ + struct ovsdb_monitor_aux *aux = aux_; + const struct ovsdb_monitor *m = aux->monitor; + struct ovsdb_table *table = new ? new->table : old->table; + const struct uuid *uuid = ovsdb_row_get_uuid(new ? new : old); + struct ovsdb_monitor_row *change; + struct ovsdb_monitor_table *mt; + + if (!aux->mt || table != aux->mt->table) { + aux->mt = shash_find_data(&m->tables, table->schema->name); + if (!aux->mt) { + /* We don't care about rows in this table at all. Tell the caller + * to skip it. */ + return false; + } + } + mt = aux->mt; + + change = ovsdb_monitor_row_find(mt, uuid); + if (!change) { + change = xmalloc(sizeof *change); + hmap_insert(&mt->changes, &change->hmap_node, uuid_hash(uuid)); + change->uuid = *uuid; + change->old = clone_monitor_row_data(mt, old); + change->new = clone_monitor_row_data(mt, new); + } else { + if (new) { + update_monitor_row_data(mt, new, change->new); + } else { + free_monitor_row_data(mt, change->new); + change->new = NULL; + + if (!change->old) { + /* This row was added then deleted. Forget about it. */ + hmap_remove(&mt->changes, &change->hmap_node); + free(change); + } + } + } + return true; +} + +struct json * +ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) +{ + struct ovsdb_monitor_aux aux; + struct shash_node *node; + struct json *json; + + ovsdb_monitor_init_aux(&aux, dbmon); + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + + if (mt->select & OJMS_INITIAL) { + struct ovsdb_row *row; + + HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { + ovsdb_monitor_change_cb(NULL, row, NULL, &aux); + } + } + } + json = ovsdb_monitor_compose_table_update(dbmon, true); + return json ? json : json_object_create(); +} + +void +ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) +{ + struct shash_node *node; + + list_remove(&dbmon->replica.node); + + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; + struct ovsdb_monitor_row *row, *next; + + HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { + hmap_remove(&mt->changes, &row->hmap_node); + ovsdb_monitor_row_destroy(mt, row); + } + hmap_destroy(&mt->changes); + + free(mt->columns); + free(mt); + } + shash_destroy(&dbmon->tables); + free(dbmon); +} + +static struct ovsdb_error * +ovsdb_monitor_commit(struct ovsdb_replica *replica, + const struct ovsdb_txn *txn, + bool durable OVS_UNUSED) +{ + struct ovsdb_monitor *m = ovsdb_monitor_cast(replica); + struct ovsdb_monitor_aux aux; + + ovsdb_monitor_init_aux(&aux, m); + ovsdb_txn_for_each_change(txn, ovsdb_monitor_change_cb, &aux); + + return NULL; +} + +static void +ovsdb_monitor_destroy_callback(struct ovsdb_replica *replica) +{ + struct ovsdb_monitor *dbmon = ovsdb_monitor_cast(replica); + struct ovsdb_jsonrpc_monitor *m = dbmon->jsonrpc_monitor; + + ovsdb_jsonrpc_monitor_destroy(m); +} + +static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class = { + ovsdb_monitor_commit, + ovsdb_monitor_destroy_callback, +}; diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h new file mode 100644 index 000000000..82c9a578b --- /dev/null +++ b/ovsdb/monitor.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OVSDB_MONITOR_H +#define OVSDB_MONITOR_H + +struct ovsdb_monitor; + +enum ovsdb_monitor_selection { + OJMS_INITIAL = 1 << 0, /* All rows when monitor is created. */ + OJMS_INSERT = 1 << 1, /* New rows. */ + OJMS_DELETE = 1 << 2, /* Deleted rows. */ + OJMS_MODIFY = 1 << 3 /* Modified rows. */ +}; + + +struct ovsdb_monitor *ovsdb_monitor_create(struct ovsdb *db, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); + +void ovsdb_monitor_add_table(struct ovsdb_monitor *m, + const struct ovsdb_table *table); + +void ovsdb_monitor_add_column(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, + const struct ovsdb_column *column, + enum ovsdb_monitor_selection select, + size_t *allocated_columns); + +const char * OVS_WARN_UNUSED_RESULT +ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *, + const struct ovsdb_table *); + +struct json *ovsdb_monitor_compose_table_update( + const struct ovsdb_monitor *dbmon, bool initial); + +void ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, + const struct ovsdb_table *table, + enum ovsdb_monitor_selection select); + +bool ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon); + +struct json *ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon); + +void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); +#endif From 61b63013e6074478d264d20c2db0edf17e57a0ff Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 17 Mar 2015 21:25:20 -0700 Subject: [PATCH 082/146] ovsdb-monitor: refactoring ovsdb_monitor_get_initial Refactoring ovsdb_monitor_get_initial() to not generate JSON object. It only collect changes within the ovsdb_monitor(). ovsdb_jsonrpc_monitor_compose_table_update() is then used to generate JSON object. This change will also make future patch easier. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 9 +++++++-- ovsdb/monitor.c | 5 +---- ovsdb/monitor.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 55148971a..1d57600d1 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -89,6 +89,9 @@ static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_cancel( static void ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *); static bool ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *); +static struct json *ovsdb_jsonrpc_monitor_compose_table_update( + const struct ovsdb_jsonrpc_monitor *monitor, bool initial); + /* JSON-RPC database server. */ @@ -1228,8 +1231,10 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, } } - return jsonrpc_create_reply(ovsdb_monitor_get_initial(m->dbmon), - request_id); + ovsdb_monitor_get_initial(m->dbmon); + json = ovsdb_jsonrpc_monitor_compose_table_update(m, true); + json = json ? json : json_object_create(); + return jsonrpc_create_reply(json, request_id); error: if (m) { diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index a1aeb5f51..95299cebb 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -505,12 +505,11 @@ ovsdb_monitor_change_cb(const struct ovsdb_row *old, return true; } -struct json * +void ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) { struct ovsdb_monitor_aux aux; struct shash_node *node; - struct json *json; ovsdb_monitor_init_aux(&aux, dbmon); SHASH_FOR_EACH (node, &dbmon->tables) { @@ -524,8 +523,6 @@ ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) } } } - json = ovsdb_monitor_compose_table_update(dbmon, true); - return json ? json : json_object_create(); } void diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h index 82c9a578b..aefe1d5d7 100644 --- a/ovsdb/monitor.h +++ b/ovsdb/monitor.h @@ -52,7 +52,7 @@ void ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, bool ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon); -struct json *ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon); +void ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon); void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); #endif From d941283758dffb8471b13469c75f3512e0df6417 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Thu, 9 Apr 2015 16:39:58 -0700 Subject: [PATCH 083/146] ovsdb-monitor: stores jsonrpc-monitor in a linked-list Currently, each ovsdb-monitor points to a single jsonrpc_monitor object. This means there is 1:1 relationship between them. In case multiple jsonrpc-monitors need to monitor the same tables and the columns within them, then can share a single ovsdb-monitor, so the updates only needs to be maintained once. This patch, with a few following patches, will allow for N:1 mapping between jsonrpc-monitor and ovsdb-monitor. Maintaining jsonrpc-monitor pointers in a linked-list is essential in allowing N:1 mapping. The ovsdb-monitor life cycle is now reference counted. An empty list means zero references. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 2 +- ovsdb/monitor.c | 50 ++++++++++++++++++++++++++++++++++++++---- ovsdb/monitor.h | 5 +++-- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 1d57600d1..93d5977bf 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1304,7 +1304,7 @@ ovsdb_jsonrpc_monitor_destroy(struct ovsdb_jsonrpc_monitor *m) { json_destroy(m->monitor_id); hmap_remove(&m->session->monitors, &m->node); - ovsdb_monitor_destroy(m->dbmon); + ovsdb_monitor_remove_jsonrpc_monitor(m->dbmon, m); free(m); } diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 95299cebb..db47ccd01 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -47,10 +47,15 @@ static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; struct ovsdb_monitor { struct ovsdb_replica replica; struct shash tables; /* Holds "struct ovsdb_monitor_table"s. */ - struct ovsdb_jsonrpc_monitor *jsonrpc_monitor; + struct ovs_list jsonrpc_monitors; /* Contains "jsonrpc_monitor_node"s. */ struct ovsdb *db; }; +struct jsonrpc_monitor_node { + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor; + struct ovs_list node; +}; + /* A particular column being monitored. */ struct ovsdb_monitor_column { const struct ovsdb_column *column; @@ -82,6 +87,8 @@ struct ovsdb_monitor_table { struct hmap changes; }; +static void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); + static int compare_ovsdb_monitor_column(const void *a_, const void *b_) { @@ -199,15 +206,20 @@ ovsdb_monitor_create(struct ovsdb *db, struct ovsdb_jsonrpc_monitor *jsonrpc_monitor) { struct ovsdb_monitor *dbmon; + struct jsonrpc_monitor_node *jm; dbmon = xzalloc(sizeof *dbmon); ovsdb_replica_init(&dbmon->replica, &ovsdb_jsonrpc_replica_class); ovsdb_add_replica(db, &dbmon->replica); - dbmon->jsonrpc_monitor = jsonrpc_monitor; + list_init(&dbmon->jsonrpc_monitors); dbmon->db = db; shash_init(&dbmon->tables); + jm = xzalloc(sizeof *jm); + jm->jsonrpc_monitor = jsonrpc_monitor; + list_push_back(&dbmon->jsonrpc_monitors, &jm->node); + return dbmon; } @@ -526,6 +538,31 @@ ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) } void +ovsdb_monitor_remove_jsonrpc_monitor(struct ovsdb_monitor *dbmon, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor) +{ + struct jsonrpc_monitor_node *jm; + + /* Find and remove the jsonrpc monitor from the list. */ + LIST_FOR_EACH(jm, node, &dbmon->jsonrpc_monitors) { + if (jm->jsonrpc_monitor == jsonrpc_monitor) { + list_remove(&jm->node); + free(jm); + + /* Destroy ovsdb monitor if this is the last user. */ + if (list_is_empty(&dbmon->jsonrpc_monitors)) { + ovsdb_monitor_destroy(dbmon); + } + + return; + }; + } + + /* Should never reach here. jsonrpc_monitor should be on the list. */ + OVS_NOT_REACHED(); +} + +static void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) { struct shash_node *node; @@ -567,9 +604,14 @@ static void ovsdb_monitor_destroy_callback(struct ovsdb_replica *replica) { struct ovsdb_monitor *dbmon = ovsdb_monitor_cast(replica); - struct ovsdb_jsonrpc_monitor *m = dbmon->jsonrpc_monitor; + struct jsonrpc_monitor_node *jm, *next; - ovsdb_jsonrpc_monitor_destroy(m); + /* Delete all front end monitors. Removing the last front + * end monitor will also destroy the corresponding 'ovsdb_monitor'. + * ovsdb monitor will also be destroied. */ + LIST_FOR_EACH_SAFE(jm, next, node, &dbmon->jsonrpc_monitors) { + ovsdb_jsonrpc_monitor_destroy(jm->jsonrpc_monitor); + } } static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class = { diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h index aefe1d5d7..a4324322f 100644 --- a/ovsdb/monitor.h +++ b/ovsdb/monitor.h @@ -30,6 +30,9 @@ enum ovsdb_monitor_selection { struct ovsdb_monitor *ovsdb_monitor_create(struct ovsdb *db, struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); +void ovsdb_monitor_remove_jsonrpc_monitor(struct ovsdb_monitor *dbmon, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); + void ovsdb_monitor_add_table(struct ovsdb_monitor *m, const struct ovsdb_table *table); @@ -53,6 +56,4 @@ void ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, bool ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon); void ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon); - -void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); #endif From 59c35e1198ee42887649297bb4fd8687c5005ac8 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Thu, 12 Mar 2015 23:15:56 -0700 Subject: [PATCH 084/146] ovsdb-monitor: add transaction ids With N:1 mappings, multiple jsonrpc server may be servicing the rpc connection at a different pace. ovsdb-monitor thus needs to maintain different change sets, depends on connection speed of each rpc connections. Connections servicing at the same speed can share the same change set. Transaction ID is an concept added to describe the change set. One possible view of the database state is a sequence of changes, more precisely, commits be applied to it in order, starting from an initial state, with commit 0. The logic can also be applied to the jsonrpc monitor; each change it pushes corresponds to commits between two transaction IDs. This patch introduces transaction IDs. For ovsdb-monitor, it maintains n_transactions, starting from 0. Each commit add 1 to the number. Jsonrpc maintains and 'unflushed' transaction number, corresponding to the next commit the remote has not seen. jsonrpc's job is simply to notice there are changes in the ovsdb-monitor that it is interested in, i.e. 'n_transactions' >= 'unflushed', get the changes in json format, and push them to the remote site. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 12 ++++++++---- ovsdb/monitor.c | 24 +++++++++++------------- ovsdb/monitor.h | 7 ++++--- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 93d5977bf..84e1bb487 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -90,7 +90,7 @@ static void ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *); static bool ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *); static struct json *ovsdb_jsonrpc_monitor_compose_table_update( - const struct ovsdb_jsonrpc_monitor *monitor, bool initial); + struct ovsdb_jsonrpc_monitor *monitor, bool initial); /* JSON-RPC database server. */ @@ -1037,6 +1037,8 @@ struct ovsdb_jsonrpc_monitor { struct hmap_node node; /* In ovsdb_jsonrpc_session's "monitors". */ struct json *monitor_id; struct ovsdb_monitor *dbmon; + uint64_t unflushed; /* The first transaction that has not been + flushed to the jsonrpc remote client. */ }; static struct ovsdb_jsonrpc_monitor * @@ -1181,6 +1183,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, m->session = s; m->db = db; m->dbmon = ovsdb_monitor_create(db, m); + m->unflushed = 0; hmap_insert(&s->monitors, &m->node, json_hash(monitor_id, 0)); m->monitor_id = json_clone(monitor_id); @@ -1280,9 +1283,10 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) static struct json * ovsdb_jsonrpc_monitor_compose_table_update( - const struct ovsdb_jsonrpc_monitor *monitor, bool initial) + struct ovsdb_jsonrpc_monitor *monitor, bool initial) { - return ovsdb_monitor_compose_table_update(monitor->dbmon, initial); + return ovsdb_monitor_compose_table_update(monitor->dbmon, initial, + &monitor->unflushed); } static bool @@ -1291,7 +1295,7 @@ ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *s) struct ovsdb_jsonrpc_monitor *m; HMAP_FOR_EACH (m, node, &s->monitors) { - if (ovsdb_monitor_needs_flush(m->dbmon)) { + if (ovsdb_monitor_needs_flush(m->dbmon, m->unflushed)) { return true; } } diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index db47ccd01..769ea3622 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -49,6 +49,7 @@ struct ovsdb_monitor { struct shash tables; /* Holds "struct ovsdb_monitor_table"s. */ struct ovs_list jsonrpc_monitors; /* Contains "jsonrpc_monitor_node"s. */ struct ovsdb *db; + uint64_t n_transactions; /* Count number of committed transactions. */ }; struct jsonrpc_monitor_node { @@ -214,6 +215,7 @@ ovsdb_monitor_create(struct ovsdb *db, ovsdb_add_replica(db, &dbmon->replica); list_init(&dbmon->jsonrpc_monitors); dbmon->db = db; + dbmon->n_transactions = 0; shash_init(&dbmon->tables); jm = xzalloc(sizeof *jm); @@ -376,14 +378,16 @@ ovsdb_monitor_compose_row_update( * be used as part of the initial reply to a "monitor" request, false if it is * going to be used as part of an "update" notification. */ struct json * -ovsdb_monitor_compose_table_update( - const struct ovsdb_monitor *dbmon, bool initial) +ovsdb_monitor_compose_table_update(const struct ovsdb_monitor *dbmon, + bool initial, uint64_t *unflushed) { struct shash_node *node; unsigned long int *changed; struct json *json; size_t max_columns; + *unflushed = dbmon->n_transactions + 1; + max_columns = 0; SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; @@ -432,18 +436,11 @@ ovsdb_monitor_compose_table_update( } bool -ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon) +ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon, + uint64_t next_transaction) { - struct shash_node *node; - - SHASH_FOR_EACH (node, &dbmon->tables) { - struct ovsdb_monitor_table *mt = node->data; - - if (!hmap_is_empty(&mt->changes)) { - return true; - } - } - return false; + ovs_assert(next_transaction <= dbmon->n_transactions + 1); + return (next_transaction <= dbmon->n_transactions); } void @@ -596,6 +593,7 @@ ovsdb_monitor_commit(struct ovsdb_replica *replica, ovsdb_monitor_init_aux(&aux, m); ovsdb_txn_for_each_change(txn, ovsdb_monitor_change_cb, &aux); + m->n_transactions++; return NULL; } diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h index a4324322f..4db36b2fd 100644 --- a/ovsdb/monitor.h +++ b/ovsdb/monitor.h @@ -46,14 +46,15 @@ const char * OVS_WARN_UNUSED_RESULT ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *, const struct ovsdb_table *); -struct json *ovsdb_monitor_compose_table_update( - const struct ovsdb_monitor *dbmon, bool initial); +struct json *ovsdb_monitor_compose_table_update(const struct ovsdb_monitor *dbmon, + bool initial, uint64_t *unflushed_transaction); void ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, const struct ovsdb_table *table, enum ovsdb_monitor_selection select); -bool ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon); +bool ovsdb_monitor_needs_flush(struct ovsdb_monitor *dbmon, + uint64_t next_transaction); void ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon); #endif From f1de87bb2f568ad126e77e85746ce63376ff0bd5 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 13 Mar 2015 13:41:38 -0700 Subject: [PATCH 085/146] ovsdb-monitor: rename jsonrpc_monitor_compose_table_update() jsonrpc_monitor_compose_update() seems to fit better than jsonrpc_monitor_compose_table_update(), since it composes changes from all tables. Albeit the original one is named after the object described in RFC 7047. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 12 ++++++------ ovsdb/monitor.c | 4 ++-- ovsdb/monitor.h | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 84e1bb487..05aaf8718 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -89,7 +89,7 @@ static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_cancel( static void ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *); static bool ovsdb_jsonrpc_monitor_needs_flush(struct ovsdb_jsonrpc_session *); -static struct json *ovsdb_jsonrpc_monitor_compose_table_update( +static struct json *ovsdb_jsonrpc_monitor_compose_update( struct ovsdb_jsonrpc_monitor *monitor, bool initial); @@ -1235,7 +1235,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, } ovsdb_monitor_get_initial(m->dbmon); - json = ovsdb_jsonrpc_monitor_compose_table_update(m, true); + json = ovsdb_jsonrpc_monitor_compose_update(m, true); json = json ? json : json_object_create(); return jsonrpc_create_reply(json, request_id); @@ -1282,11 +1282,11 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) } static struct json * -ovsdb_jsonrpc_monitor_compose_table_update( +ovsdb_jsonrpc_monitor_compose_update( struct ovsdb_jsonrpc_monitor *monitor, bool initial) { - return ovsdb_monitor_compose_table_update(monitor->dbmon, initial, - &monitor->unflushed); + return ovsdb_monitor_compose_update(monitor->dbmon, initial, + &monitor->unflushed); } static bool @@ -1320,7 +1320,7 @@ ovsdb_jsonrpc_monitor_flush_all(struct ovsdb_jsonrpc_session *s) HMAP_FOR_EACH (m, node, &s->monitors) { struct json *json; - json = ovsdb_jsonrpc_monitor_compose_table_update(m, false); + json = ovsdb_jsonrpc_monitor_compose_update(m, false); if (json) { struct jsonrpc_msg *msg; struct json *params; diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 769ea3622..0898808b4 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -378,8 +378,8 @@ ovsdb_monitor_compose_row_update( * be used as part of the initial reply to a "monitor" request, false if it is * going to be used as part of an "update" notification. */ struct json * -ovsdb_monitor_compose_table_update(const struct ovsdb_monitor *dbmon, - bool initial, uint64_t *unflushed) +ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, + bool initial, uint64_t *unflushed) { struct shash_node *node; unsigned long int *changed; diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h index 4db36b2fd..5fcb346e0 100644 --- a/ovsdb/monitor.h +++ b/ovsdb/monitor.h @@ -46,8 +46,8 @@ const char * OVS_WARN_UNUSED_RESULT ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *, const struct ovsdb_table *); -struct json *ovsdb_monitor_compose_table_update(const struct ovsdb_monitor *dbmon, - bool initial, uint64_t *unflushed_transaction); +struct json *ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, + bool initial, uint64_t *unflushed_transaction); void ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, const struct ovsdb_table *table, From 1158f320622954f4027a35260916f6a950529c27 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 13 Mar 2015 16:35:49 -0700 Subject: [PATCH 086/146] ovsdb-monitor: add ovsdb_monitor_changes Currently, each monitor table contains a single hmap 'changes' to track updates. This patch introduces a new data structure 'ovsdb_monitor_changes' that stores the updates 'rows' tagged by its first commit transaction id. Each 'ovsdb_monitor_changes' is refenece counted allowing multiple jsonrpc_monitors to share them. The next patch will allow each ovsdb monitor table to store a list of 'ovsdb_monitor_changes'. This patch stores only one, same as before. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 7 +-- ovsdb/monitor.c | 130 +++++++++++++++++++++++++++++++++++------ 2 files changed, 114 insertions(+), 23 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 05aaf8718..efd83b8f0 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1282,11 +1282,10 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) } static struct json * -ovsdb_jsonrpc_monitor_compose_update( - struct ovsdb_jsonrpc_monitor *monitor, bool initial) +ovsdb_jsonrpc_monitor_compose_update(struct ovsdb_jsonrpc_monitor *m, + bool initial) { - return ovsdb_monitor_compose_update(monitor->dbmon, initial, - &monitor->unflushed); + return ovsdb_monitor_compose_update(m->dbmon, initial, &m->unflushed); } static bool diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 0898808b4..b82941b38 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -71,6 +71,23 @@ struct ovsdb_monitor_row { struct ovsdb_datum *new; /* New data, NULL for a deleted row. */ }; +/* Contains 'struct ovsdb_monitor_row's for rows that have been + * updated but not yet flushed to all the jsonrpc connection. + * + * 'n_refs' represent the number of jsonrpc connections that have + * not received updates. Generate the update for the last jsonprc + * connection will also destroy the whole "struct ovsdb_monitor_changes" + * object. + * + * 'transaction' stores the first update's transaction id. + * */ +struct ovsdb_monitor_changes { + struct ovsdb_monitor_table *mt; + struct hmap rows; + int n_refs; + uint64_t transaction; +}; + /* A particular table being monitored. */ struct ovsdb_monitor_table { const struct ovsdb_table *table; @@ -85,10 +102,16 @@ struct ovsdb_monitor_table { /* Contains 'struct ovsdb_monitor_row's for rows that have been * updated but not yet flushed to the jsonrpc connection. */ - struct hmap changes; + struct ovsdb_monitor_changes *changes; }; static void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); +static void ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, + uint64_t next_txn); +static void ovsdb_monitor_changes_destroy( + struct ovsdb_monitor_changes *changes); +static void ovsdb_monitor_table_track_changes(struct ovsdb_monitor_table *mt, + uint64_t transaction); static int compare_ovsdb_monitor_column(const void *a_, const void *b_) @@ -106,7 +129,7 @@ ovsdb_monitor_cast(struct ovsdb_replica *replica) return CONTAINER_OF(replica, struct ovsdb_monitor, replica); } -/* Finds and returns the ovsdb_monitor_row in 'mt->changes' for the +/* Finds and returns the ovsdb_monitor_row in 'mt->changes->rows' for the * given 'uuid', or NULL if there is no such row. */ static struct ovsdb_monitor_row * ovsdb_monitor_row_find(const struct ovsdb_monitor_table *mt, @@ -114,7 +137,8 @@ ovsdb_monitor_row_find(const struct ovsdb_monitor_table *mt, { struct ovsdb_monitor_row *row; - HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &mt->changes) { + HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), + &mt->changes->rows) { if (uuid_equals(uuid, &row->uuid)) { return row; } @@ -233,7 +257,7 @@ ovsdb_monitor_add_table(struct ovsdb_monitor *m, mt = xzalloc(sizeof *mt); mt->table = table; - hmap_init(&mt->changes); + mt->changes = NULL; shash_add(&m->tables, table->schema->name, mt); } @@ -286,6 +310,61 @@ ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *m, return NULL; } +static void +ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, + uint64_t next_txn) +{ + struct ovsdb_monitor_changes *changes; + + changes = xzalloc(sizeof *changes); + + changes->transaction = next_txn; + changes->mt = mt; + changes->n_refs = 1; + hmap_init(&changes->rows); + mt->changes = changes; +} + +/* Stop currently tracking changes to table 'mt' since 'transaction'. + * + * Return 'true' if the 'transaction' is being tracked. 'false' otherwise. */ +static void +ovsdb_monitor_table_untrack_changes(struct ovsdb_monitor_table *mt, + uint64_t transaction) +{ + struct ovsdb_monitor_changes *changes = mt->changes; + if (changes) { + ovs_assert(changes->transaction == transaction); + if (--changes->n_refs == 0) { + ovsdb_monitor_changes_destroy(changes); + mt->changes = NULL; + } + } +} + +/* Start tracking changes to table 'mt' begins from 'transaction' inclusive. + */ +static void +ovsdb_monitor_table_track_changes(struct ovsdb_monitor_table *mt, + uint64_t transaction) +{ + ovs_assert(!mt->changes); + ovsdb_monitor_table_add_changes(mt, transaction); +} + +static void +ovsdb_monitor_changes_destroy(struct ovsdb_monitor_changes *changes) +{ + struct ovsdb_monitor_row *row, *next; + + HMAP_FOR_EACH_SAFE (row, next, hmap_node, &changes->rows) { + hmap_remove(&changes->rows, &row->hmap_node); + ovsdb_monitor_row_destroy(changes->mt, row); + } + hmap_destroy(&changes->rows); + free(changes); +} + /* Returns JSON for a (as described in RFC 7047) for 'row' within * 'mt', or NULL if no row update should be sent. * @@ -376,7 +455,13 @@ ovsdb_monitor_compose_row_update( * * The caller should specify 'initial' as true if the returned JSON is going to * be used as part of the initial reply to a "monitor" request, false if it is - * going to be used as part of an "update" notification. */ + * going to be used as part of an "update" notification. + * + * 'unflushed' should point to value that is the transaction ID that did + * was not updated. The update contains changes between + * ['unflushed, ovsdb->n_transcations]. Before the function returns, this + * value will be updated to ovsdb->n_transactions + 1, ready for the next + * update. */ struct json * ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, bool initial, uint64_t *unflushed) @@ -385,8 +470,8 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, unsigned long int *changed; struct json *json; size_t max_columns; - - *unflushed = dbmon->n_transactions + 1; + uint64_t prev_txn = *unflushed; + uint64_t next_txn = dbmon->n_transactions + 1; max_columns = 0; SHASH_FOR_EACH (node, &dbmon->tables) { @@ -402,7 +487,12 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, struct ovsdb_monitor_row *row, *next; struct json *table_json = NULL; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { + if (!mt->changes) { + ovsdb_monitor_table_track_changes(mt, next_txn); + continue; + } + + HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes->rows) { struct json *row_json; row_json = ovsdb_monitor_compose_row_update( @@ -426,11 +516,15 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, json_object_put(table_json, uuid, row_json); } - hmap_remove(&mt->changes, &row->hmap_node); + hmap_remove(&mt->changes->rows, &row->hmap_node); ovsdb_monitor_row_destroy(mt, row); } + + ovsdb_monitor_table_untrack_changes(mt, prev_txn); + ovsdb_monitor_table_track_changes(mt, next_txn); } + *unflushed = next_txn; free(changed); return json; } @@ -492,8 +586,8 @@ ovsdb_monitor_change_cb(const struct ovsdb_row *old, change = ovsdb_monitor_row_find(mt, uuid); if (!change) { - change = xmalloc(sizeof *change); - hmap_insert(&mt->changes, &change->hmap_node, uuid_hash(uuid)); + change = xzalloc(sizeof *change); + hmap_insert(&mt->changes->rows, &change->hmap_node, uuid_hash(uuid)); change->uuid = *uuid; change->old = clone_monitor_row_data(mt, old); change->new = clone_monitor_row_data(mt, new); @@ -506,7 +600,7 @@ ovsdb_monitor_change_cb(const struct ovsdb_row *old, if (!change->old) { /* This row was added then deleted. Forget about it. */ - hmap_remove(&mt->changes, &change->hmap_node); + hmap_remove(&mt->changes->rows, &change->hmap_node); free(change); } } @@ -527,6 +621,10 @@ ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) if (mt->select & OJMS_INITIAL) { struct ovsdb_row *row; + if (!mt->changes) { + ovsdb_monitor_table_add_changes(mt, 0); + } + HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { ovsdb_monitor_change_cb(NULL, row, NULL, &aux); } @@ -568,14 +666,8 @@ ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; - struct ovsdb_monitor_row *row, *next; - - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes) { - hmap_remove(&mt->changes, &row->hmap_node); - ovsdb_monitor_row_destroy(mt, row); - } - hmap_destroy(&mt->changes); + ovsdb_monitor_changes_destroy(mt->changes); free(mt->columns); free(mt); } From 7e911055e657fa2a2af52f9fa3e7545055fa1f09 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Sat, 14 Mar 2015 00:40:18 -0700 Subject: [PATCH 087/146] ovsdb-monitor: allow multiple of 'ovsdb_monitor_changes' in each ovsdb monitor table Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/monitor.c | 126 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 40 deletions(-) diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index b82941b38..9541f3e76 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -28,6 +28,7 @@ #include "ovsdb.h" #include "row.h" #include "simap.h" +#include "hash.h" #include "table.h" #include "timeval.h" #include "transaction.h" @@ -86,6 +87,8 @@ struct ovsdb_monitor_changes { struct hmap rows; int n_refs; uint64_t transaction; + struct hmap_node hmap_node; /* Element in ovsdb_monitor_tables' changes + hmap. */ }; /* A particular table being monitored. */ @@ -100,18 +103,19 @@ struct ovsdb_monitor_table { struct ovsdb_monitor_column *columns; size_t n_columns; - /* Contains 'struct ovsdb_monitor_row's for rows that have been - * updated but not yet flushed to the jsonrpc connection. */ - struct ovsdb_monitor_changes *changes; + /* Contains 'ovsdb_monitor_changes' indexed by 'transaction'. */ + struct hmap changes; }; static void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); static void ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, uint64_t next_txn); +static struct ovsdb_monitor_changes *ovsdb_monitor_table_find_changes( + struct ovsdb_monitor_table *mt, uint64_t unflushed); static void ovsdb_monitor_changes_destroy( struct ovsdb_monitor_changes *changes); static void ovsdb_monitor_table_track_changes(struct ovsdb_monitor_table *mt, - uint64_t transaction); + uint64_t unflushed); static int compare_ovsdb_monitor_column(const void *a_, const void *b_) @@ -132,13 +136,13 @@ ovsdb_monitor_cast(struct ovsdb_replica *replica) /* Finds and returns the ovsdb_monitor_row in 'mt->changes->rows' for the * given 'uuid', or NULL if there is no such row. */ static struct ovsdb_monitor_row * -ovsdb_monitor_row_find(const struct ovsdb_monitor_table *mt, - const struct uuid *uuid) +ovsdb_monitor_changes_row_find(const struct ovsdb_monitor_changes *changes, + const struct uuid *uuid) { struct ovsdb_monitor_row *row; HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), - &mt->changes->rows) { + &changes->rows) { if (uuid_equals(uuid, &row->uuid)) { return row; } @@ -257,8 +261,8 @@ ovsdb_monitor_add_table(struct ovsdb_monitor *m, mt = xzalloc(sizeof *mt); mt->table = table; - mt->changes = NULL; shash_add(&m->tables, table->schema->name, mt); + hmap_init(&mt->changes); } void @@ -322,7 +326,23 @@ ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, changes->mt = mt; changes->n_refs = 1; hmap_init(&changes->rows); - mt->changes = changes; + hmap_insert(&mt->changes, &changes->hmap_node, hash_uint64(next_txn)); +}; + +static struct ovsdb_monitor_changes * +ovsdb_monitor_table_find_changes(struct ovsdb_monitor_table *mt, + uint64_t transaction) +{ + struct ovsdb_monitor_changes *changes; + size_t hash = hash_uint64(transaction); + + HMAP_FOR_EACH_WITH_HASH(changes, hmap_node, hash, &mt->changes) { + if (changes->transaction == transaction) { + return changes; + } + } + + return NULL; } /* Stop currently tracking changes to table 'mt' since 'transaction'. @@ -332,12 +352,13 @@ static void ovsdb_monitor_table_untrack_changes(struct ovsdb_monitor_table *mt, uint64_t transaction) { - struct ovsdb_monitor_changes *changes = mt->changes; + struct ovsdb_monitor_changes *changes = + ovsdb_monitor_table_find_changes(mt, transaction); if (changes) { ovs_assert(changes->transaction == transaction); if (--changes->n_refs == 0) { + hmap_remove(&mt->changes, &changes->hmap_node); ovsdb_monitor_changes_destroy(changes); - mt->changes = NULL; } } } @@ -348,8 +369,14 @@ static void ovsdb_monitor_table_track_changes(struct ovsdb_monitor_table *mt, uint64_t transaction) { - ovs_assert(!mt->changes); - ovsdb_monitor_table_add_changes(mt, transaction); + struct ovsdb_monitor_changes *changes; + + changes = ovsdb_monitor_table_find_changes(mt, transaction); + if (changes) { + changes->n_refs++; + } else { + ovsdb_monitor_table_add_changes(mt, transaction); + } } static void @@ -485,14 +512,16 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; struct ovsdb_monitor_row *row, *next; + struct ovsdb_monitor_changes *changes; struct json *table_json = NULL; - if (!mt->changes) { + changes = ovsdb_monitor_table_find_changes(mt, prev_txn); + if (!changes) { ovsdb_monitor_table_track_changes(mt, next_txn); continue; } - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mt->changes->rows) { + HMAP_FOR_EACH_SAFE (row, next, hmap_node, &changes->rows) { struct json *row_json; row_json = ovsdb_monitor_compose_row_update( @@ -516,7 +545,7 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, json_object_put(table_json, uuid, row_json); } - hmap_remove(&mt->changes->rows, &row->hmap_node); + hmap_remove(&changes->rows, &row->hmap_node); ovsdb_monitor_row_destroy(mt, row); } @@ -561,33 +590,19 @@ ovsdb_monitor_init_aux(struct ovsdb_monitor_aux *aux, aux->mt = NULL; } -static bool -ovsdb_monitor_change_cb(const struct ovsdb_row *old, - const struct ovsdb_row *new, - const unsigned long int *changed OVS_UNUSED, - void *aux_) +static void +ovsdb_monitor_changes_update(const struct ovsdb_row *old, + const struct ovsdb_row *new, + const struct ovsdb_monitor_table *mt, + struct ovsdb_monitor_changes *changes) { - struct ovsdb_monitor_aux *aux = aux_; - const struct ovsdb_monitor *m = aux->monitor; - struct ovsdb_table *table = new ? new->table : old->table; const struct uuid *uuid = ovsdb_row_get_uuid(new ? new : old); struct ovsdb_monitor_row *change; - struct ovsdb_monitor_table *mt; - if (!aux->mt || table != aux->mt->table) { - aux->mt = shash_find_data(&m->tables, table->schema->name); - if (!aux->mt) { - /* We don't care about rows in this table at all. Tell the caller - * to skip it. */ - return false; - } - } - mt = aux->mt; - - change = ovsdb_monitor_row_find(mt, uuid); + change = ovsdb_monitor_changes_row_find(changes, uuid); if (!change) { change = xzalloc(sizeof *change); - hmap_insert(&mt->changes->rows, &change->hmap_node, uuid_hash(uuid)); + hmap_insert(&changes->rows, &change->hmap_node, uuid_hash(uuid)); change->uuid = *uuid; change->old = clone_monitor_row_data(mt, old); change->new = clone_monitor_row_data(mt, new); @@ -600,11 +615,38 @@ ovsdb_monitor_change_cb(const struct ovsdb_row *old, if (!change->old) { /* This row was added then deleted. Forget about it. */ - hmap_remove(&mt->changes->rows, &change->hmap_node); + hmap_remove(&changes->rows, &change->hmap_node); free(change); } } } +} + +static bool +ovsdb_monitor_change_cb(const struct ovsdb_row *old, + const struct ovsdb_row *new, + const unsigned long int *changed OVS_UNUSED, + void *aux_) +{ + struct ovsdb_monitor_aux *aux = aux_; + const struct ovsdb_monitor *m = aux->monitor; + struct ovsdb_table *table = new ? new->table : old->table; + struct ovsdb_monitor_table *mt; + struct ovsdb_monitor_changes *changes; + + if (!aux->mt || table != aux->mt->table) { + aux->mt = shash_find_data(&m->tables, table->schema->name); + if (!aux->mt) { + /* We don't care about rows in this table at all. Tell the caller + * to skip it. */ + return false; + } + } + mt = aux->mt; + + HMAP_FOR_EACH(changes, hmap_node, &mt->changes) { + ovsdb_monitor_changes_update(old, new, mt, changes); + } return true; } @@ -621,7 +663,7 @@ ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) if (mt->select & OJMS_INITIAL) { struct ovsdb_row *row; - if (!mt->changes) { + if (hmap_is_empty(&mt->changes)) { ovsdb_monitor_table_add_changes(mt, 0); } @@ -666,8 +708,12 @@ ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; + struct ovsdb_monitor_changes *changes, *next; - ovsdb_monitor_changes_destroy(mt->changes); + HMAP_FOR_EACH_SAFE (changes, next, hmap_node, &mt->changes) { + hmap_remove(&mt->changes, &changes->hmap_node); + ovsdb_monitor_changes_destroy(changes); + } free(mt->columns); free(mt); } From 8f12b27a59ff2ffaf16159de7ae38cb6bf4e43a5 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 088/146] ofproto: Eliminate use of unset error code. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index e06273276..5c3b49773 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -4372,8 +4372,7 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, rule = ofproto->ofproto_class->rule_alloc(); if (!rule) { cls_rule_destroy(&cr); - VLOG_WARN_RL(&rl, "%s: failed to create rule (%s)", - ofproto->name, ovs_strerror(error)); + VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); return ENOMEM; } From 7b3dca89069d81296e07d29014e7d47f921e669b Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 089/146] ofp-util: Fix xid in ofputil_encode_bundle_add(). Bundle add must use the same xid as the embedded message. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/ofp-util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 6366919a4..e62c584fd 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -8900,7 +8900,9 @@ ofputil_encode_bundle_add(enum ofp_version ofp_version, struct ofpbuf *request; struct ofp14_bundle_ctrl_msg *m; - request = ofpraw_alloc(OFPRAW_OFPT14_BUNDLE_ADD_MESSAGE, ofp_version, 0); + /* Must use the same xid as the embedded message. */ + request = ofpraw_alloc_xid(OFPRAW_OFPT14_BUNDLE_ADD_MESSAGE, ofp_version, + msg->msg->xid, 0); m = ofpbuf_put_zeros(request, sizeof *m); m->bundle_id = htonl(msg->bundle_id); From fc02ecc7171009453f71de572d7874d385266aed Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 090/146] classifier: Add support for invisible flows. This makes it possible to tentatively add flows to the classifier without the datapath seeing them. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/classifier-private.h | 3 +- lib/classifier.c | 67 ++++++++++++++++++++++++++++++++-------- lib/classifier.h | 2 +- ofproto/ofproto.c | 1 + utilities/ovs-ofctl.c | 1 + 5 files changed, 59 insertions(+), 15 deletions(-) diff --git a/lib/classifier-private.h b/lib/classifier-private.h index 4eed9e42f..a7edbe93b 100644 --- a/lib/classifier-private.h +++ b/lib/classifier-private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Nicira, Inc. + * Copyright (c) 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,6 +79,7 @@ struct cls_match { * 'indices'. */ /* Accessed by all readers. */ struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */ + bool visible; const struct cls_rule *cls_rule; OVSRCU_TYPE(struct cls_conjunction_set *) conj_set; const struct miniflow flow; /* Matching rule. Mask is in the subtable. */ diff --git a/lib/classifier.c b/lib/classifier.c index 68a34437a..5344ca55c 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -99,6 +99,7 @@ cls_match_alloc(const struct cls_rule *rule, rculist_init(&cls_match->list); *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule; *CONST_CAST(int *, &cls_match->priority) = rule->priority; + cls_match->visible = false; miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow), &rule->match.flow, count); ovsrcu_set_hidden(&cls_match->conj_set, @@ -136,6 +137,19 @@ next_rule_in_list(const struct cls_match *rule) return next->priority < rule->priority ? next : NULL; } +/* Return the next lower-priority rule in the list that is visible. */ +static inline const struct cls_match * +next_visible_rule_in_list(const struct cls_match *rule) +{ + const struct cls_match *next = rule; + + do { + next = next_rule_in_list(next); + } while (next && !next->visible); + + return next; +} + static inline struct cls_match * next_rule_in_list_protected__(struct cls_match *rule) { @@ -301,6 +315,16 @@ cls_rule_is_catchall(const struct cls_rule *rule) { return minimask_is_catchall(&rule->match.mask); } + +/* Rules inserted during classifier_defer() need to be made visible before + * calling classifier_publish(). + * + * 'rule' must be in a classifier. */ +void cls_rule_make_visible(const struct cls_rule *rule) +{ + rule->cls_match->visible = true; +} + /* Initializes 'cls' as a classifier that initially contains no classification * rules. */ @@ -623,8 +647,6 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, new->partition = create_partition(cls, subtable, metadata); } - /* Make rule visible to lookups. */ - /* Add new node to segment indices. * * Readers may find the rule in the indices before the rule is visible @@ -680,7 +702,10 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, /* No change in subtable's max priority or max count. */ - /* Make rule visible to iterators. */ + /* Make rule visible to lookups? */ + new->visible = cls->publish; + + /* Make rule visible to iterators (immediately). */ rculist_replace(CONST_CAST(struct rculist *, &rule->node), &old->node); @@ -693,7 +718,10 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, } } - /* Make rule visible to iterators. */ + /* Make rule visible to lookups? */ + new->visible = cls->publish; + + /* Make rule visible to iterators (immediately). */ rculist_push_back(&subtable->rules_list, CONST_CAST(struct rculist *, &rule->node)); @@ -1200,7 +1228,7 @@ classifier_lookup__(const struct classifier *cls, struct flow *flow, } /* Find next-lower-priority flow with identical flow match. */ - match = next_rule_in_list(soft[i]->match); + match = next_visible_rule_in_list(soft[i]->match); if (match) { soft[i] = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set); @@ -1664,12 +1692,18 @@ static inline const struct cls_match * find_match(const struct cls_subtable *subtable, const struct flow *flow, uint32_t hash) { - const struct cls_match *rule; + const struct cls_match *head, *rule; - CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) { - if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask, - flow)) { - return rule; + CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { + if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow, + &subtable->mask, + flow))) { + /* Return highest priority rule that is visible. */ + FOR_EACH_RULE_IN_LIST(rule, head) { + if (OVS_LIKELY(rule->visible)) { + return rule; + } + } } } @@ -1768,10 +1802,17 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, * (Rare) hash collisions may cause us to miss the opportunity for this * optimization. */ if (!cmap_node_next(inode)) { - ASSIGN_CONTAINER(rule, inode - i, index_nodes); - if (miniflow_and_mask_matches_flow_wc(&rule->flow, &subtable->mask, + const struct cls_match *head; + + ASSIGN_CONTAINER(head, inode - i, index_nodes); + if (miniflow_and_mask_matches_flow_wc(&head->flow, &subtable->mask, flow, wc)) { - return rule; + /* Return highest priority rule that is visible. */ + FOR_EACH_RULE_IN_LIST(rule, head) { + if (OVS_LIKELY(rule->visible)) { + return rule; + } + } } return NULL; } diff --git a/lib/classifier.h b/lib/classifier.h index f9af33e32..c38d92200 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -285,6 +285,7 @@ void cls_rule_format(const struct cls_rule *, struct ds *); bool cls_rule_is_catchall(const struct cls_rule *); bool cls_rule_is_loose_match(const struct cls_rule *rule, const struct minimatch *criteria); +void cls_rule_make_visible(const struct cls_rule *rule); /* Constructor/destructor. Must run single-threaded. */ void classifier_init(struct classifier *, const uint8_t *flow_segments); @@ -358,7 +359,6 @@ void cls_cursor_advance(struct cls_cursor *); #ifdef __cplusplus } #endif - static inline void classifier_defer(struct classifier *cls) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 5c3b49773..9c4e97d27 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -4431,6 +4431,7 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, ofproto_rule_unref(rule); return error; } + cls_rule_make_visible(&rule->cr); classifier_publish(&table->cls); learned_cookies_inc(ofproto, actions); diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 3d61c4b8a..54a5bb8d0 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -2403,6 +2403,7 @@ fte_insert(struct classifier *cls, const struct match *match, ovsrcu_postpone(fte_free, old); } + cls_rule_make_visible(&fte->rule); } /* Reads the flows in 'filename' as flow table entries in 'cls' for the version From 186120da7fc0f20d93778623a006c152f7cd5e90 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 091/146] classifier: Support duplicate rules. OpenFlow 1.4 bundles are easier to implement when it is possible to mark a rule as 'to_be_removed' and then insert a new, identical rule with the same priority. All but one out of the identical rules must be marked as 'to_be_removed', and the one rule that is not 'to_be_removed' must have been inserted last. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/classifier.c | 145 +++++++++++++++++++++++++++++------------------ lib/classifier.h | 51 ++++++++++++++++- 2 files changed, 137 insertions(+), 59 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 5344ca55c..6075cf785 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -131,21 +131,30 @@ next_rule_in_list__(const struct cls_match *rule) } static inline const struct cls_match * -next_rule_in_list(const struct cls_match *rule) +next_rule_in_list(const struct cls_match *rule, const struct cls_match *head) { const struct cls_match *next = next_rule_in_list__(rule); - return next->priority < rule->priority ? next : NULL; + return next != head ? next : NULL; } -/* Return the next lower-priority rule in the list that is visible. */ +/* Return the next lower-priority rule in the list that is visible. Multiple + * identical rules with the same priority may exist transitionally. In that + * case the first rule of a given priority has been marked as 'to_be_removed', + * and the later rules are marked as '!visible'. This gets a bit complex if + * there are two rules of the same priority in the list, as in that case the + * head and tail of the list will have the same priority. */ static inline const struct cls_match * next_visible_rule_in_list(const struct cls_match *rule) { const struct cls_match *next = rule; do { - next = next_rule_in_list(next); - } while (next && !next->visible); + next = next_rule_in_list__(next); + if (next->priority > rule->priority || next == rule) { + /* We have reached the head of the list, stop. */ + return NULL; + } + } while (!next->visible); return next; } @@ -159,18 +168,19 @@ next_rule_in_list_protected__(struct cls_match *rule) } static inline struct cls_match * -next_rule_in_list_protected(struct cls_match *rule) +next_rule_in_list_protected(struct cls_match *rule, struct cls_match *head) { struct cls_match *next = next_rule_in_list_protected__(rule); - return next->priority < rule->priority ? next : NULL; + return next != head ? next : NULL; } /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ -#define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ - for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) -#define FOR_EACH_RULE_IN_LIST_PROTECTED(RULE, HEAD) \ - for ((RULE) = (HEAD); (RULE) != NULL; \ - (RULE) = next_rule_in_list_protected(RULE)) +#define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ + for ((RULE) = (HEAD); (RULE) != NULL; \ + (RULE) = next_rule_in_list(RULE, HEAD)) +#define FOR_EACH_RULE_IN_LIST_PROTECTED(RULE, HEAD) \ + for ((RULE) = (HEAD); (RULE) != NULL; \ + (RULE) = next_rule_in_list_protected(RULE, HEAD)) static unsigned int minimask_get_prefix_len(const struct minimask *, const struct mf_field *); @@ -200,6 +210,7 @@ cls_rule_init__(struct cls_rule *rule, unsigned int priority) { rculist_init(&rule->node); rule->priority = priority; + rule->to_be_removed = false; rule->cls_match = NULL; } @@ -662,14 +673,17 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, struct cls_match *iter; /* Scan the list for the insertion point that will keep the list in - * order of decreasing priority. */ + * order of decreasing priority. + * Insert after 'to_be_removed' rules of the same priority. */ FOR_EACH_RULE_IN_LIST_PROTECTED (iter, head) { - if (rule->priority >= iter->priority) { + if (rule->priority > iter->priority + || (rule->priority == iter->priority + && !iter->cls_rule->to_be_removed)) { break; } } - /* 'iter' now at the insertion point or NULL it at end. */ + /* 'iter' now at the insertion point or NULL if at end. */ if (iter) { struct cls_rule *old; @@ -777,57 +791,62 @@ classifier_insert(struct classifier *cls, const struct cls_rule *rule, * Returns the removed rule, or NULL, if it was already removed. */ const struct cls_rule * -classifier_remove(struct classifier *cls, const struct cls_rule *rule) +classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule) { + struct cls_match *rule, *prev, *next; struct cls_partition *partition; - struct cls_match *cls_match; struct cls_conjunction_set *conj_set; struct cls_subtable *subtable; - struct cls_match *prev; - struct cls_match *next; int i; uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; uint8_t prev_be64ofs = 0; size_t n_rules; - cls_match = rule->cls_match; - if (!cls_match) { + rule = cls_rule->cls_match; + if (!rule) { return NULL; } /* Mark as removed. */ - CONST_CAST(struct cls_rule *, rule)->cls_match = NULL; + CONST_CAST(struct cls_rule *, cls_rule)->cls_match = NULL; - /* Remove 'rule' from the subtable's rules list. */ - rculist_remove(CONST_CAST(struct rculist *, &rule->node)); + /* Remove 'cls_rule' from the subtable's rules list. */ + rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node)); - INIT_CONTAINER(prev, rculist_back_protected(&cls_match->list), list); - INIT_CONTAINER(next, rculist_next(&cls_match->list), list); + INIT_CONTAINER(prev, rculist_back_protected(&rule->list), list); + INIT_CONTAINER(next, rculist_next(&rule->list), list); /* Remove from the list of equal rules. */ - rculist_remove(&cls_match->list); + rculist_remove(&rule->list); - /* Check if this is NOT a head rule. */ + /* Cheap check for a non-head rule. */ if (prev->priority > rule->priority) { /* Not the highest priority rule, no need to check subtable's * 'max_priority'. */ goto free; } - subtable = find_subtable(cls, &rule->match.mask); + subtable = find_subtable(cls, &cls_rule->match.mask); ovs_assert(subtable); for (i = 0; i < subtable->n_indices; i++) { - ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs, + ihash[i] = minimatch_hash_range(&cls_rule->match, prev_be64ofs, subtable->index_ofs[i], &basis); prev_be64ofs = subtable->index_ofs[i]; } - hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S, &basis); + hash = minimatch_hash_range(&cls_rule->match, prev_be64ofs, FLOW_U64S, + &basis); - /* Head rule. Check if 'next' is an identical, lower-priority rule that - * will replace 'rule' in the data structures. */ - if (next->priority < rule->priority) { - subtable_replace_head_rule(cls, subtable, cls_match, next, hash, - ihash); + /* Check if the rule is not the head rule. */ + if (rule != prev && + rule != find_equal(subtable, &cls_rule->match.flow, hash)) { + /* Not the head rule, but potentially one with the same priority. */ + goto check_priority; + } + + /* 'rule' is the head rule. Check if there is another rule to + * replace 'rule' in the data structures. */ + if (next != rule) { + subtable_replace_head_rule(cls, subtable, rule, next, hash, ihash); goto check_priority; } @@ -835,25 +854,24 @@ classifier_remove(struct classifier *cls, const struct cls_rule *rule) * data structures. */ if (subtable->ports_mask_len) { - ovs_be32 masked_ports = minimatch_get_ports(&rule->match); + ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match); trie_remove_prefix(&subtable->ports_trie, &masked_ports, subtable->ports_mask_len); } for (i = 0; i < cls->n_tries; i++) { if (subtable->trie_plen[i]) { - trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); + trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]); } } /* Remove rule node from indices. */ for (i = 0; i < subtable->n_indices; i++) { - cmap_remove(&subtable->indices[i], &cls_match->index_nodes[i], - ihash[i]); + cmap_remove(&subtable->indices[i], &rule->index_nodes[i], ihash[i]); } - n_rules = cmap_remove(&subtable->rules, &cls_match->cmap_node, hash); + n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash); - partition = cls_match->partition; + partition = rule->partition; if (partition) { tag_tracker_subtract(&partition->tracker, &partition->tags, subtable->tag); @@ -871,8 +889,8 @@ check_priority: if (subtable->max_priority == rule->priority && --subtable->max_count == 0) { /* Find the new 'max_priority' and 'max_count'. */ - struct cls_match *head; int max_priority = INT_MIN; + struct cls_match *head; CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { if (head->priority > max_priority) { @@ -893,14 +911,14 @@ check_priority: free: conj_set = ovsrcu_get_protected(struct cls_conjunction_set *, - &cls_match->conj_set); + &rule->conj_set); if (conj_set) { ovsrcu_postpone(free, conj_set); } - ovsrcu_postpone(free, cls_match); + ovsrcu_postpone(free, rule); cls->n_rules--; - return rule; + return cls_rule; } /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all @@ -1273,7 +1291,10 @@ classifier_lookup(const struct classifier *cls, struct flow *flow, /* Finds and returns a rule in 'cls' with exactly the same priority and * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't - * contain an exact match. */ + * contain an exact match. + * + * Returns the first matching rule that is not 'to_be_removed'. Only one such + * rule may exist. */ const struct cls_rule * classifier_find_rule_exactly(const struct classifier *cls, const struct cls_rule *target) @@ -1293,8 +1314,12 @@ classifier_find_rule_exactly(const struct classifier *cls, return NULL; } FOR_EACH_RULE_IN_LIST (rule, head) { - if (target->priority >= rule->priority) { - return target->priority == rule->priority ? rule->cls_rule : NULL; + if (rule->priority < target->priority) { + break; /* Not found. */ + } + if (rule->priority == target->priority + && !rule->cls_rule->to_be_removed) { + return rule->cls_rule; } } return NULL; @@ -1324,7 +1349,11 @@ classifier_find_match_exactly(const struct classifier *cls, * A trivial example of overlapping rules is two rules matching disjoint sets * of fields. E.g., if one rule matches only on port number, while another only * on dl_type, any packet from that specific port and with that specific - * dl_type could match both, if the rules also have the same priority. */ + * dl_type could match both, if the rules also have the same priority. + * + * 'target' is not considered to overlap with a rule that has been marked + * as 'to_be_removed'. + */ bool classifier_rule_overlaps(const struct classifier *cls, const struct cls_rule *target) @@ -1342,6 +1371,7 @@ classifier_rule_overlaps(const struct classifier *cls, RCULIST_FOR_EACH (rule, node, &subtable->rules_list) { if (rule->priority == target->priority + && !rule->to_be_removed && miniflow_equal_in_minimask(&target->match.flow, &rule->match.flow, &mask)) { return true; @@ -1398,10 +1428,13 @@ cls_rule_is_loose_match(const struct cls_rule *rule, static bool rule_matches(const struct cls_rule *rule, const struct cls_rule *target) { - return (!target - || miniflow_equal_in_minimask(&rule->match.flow, - &target->match.flow, - &target->match.mask)); + /* Iterators never see rules that have been marked for removal. + * This allows them to be oblivious of duplicate rules. */ + return (!rule->to_be_removed && + (!target + || miniflow_equal_in_minimask(&rule->match.flow, + &target->match.flow, + &target->match.mask))); } static const struct cls_rule * @@ -1430,8 +1463,8 @@ search_subtable(const struct cls_subtable *subtable, * such that cls_rule_is_loose_match(rule, target) returns true. * * Ignores target->priority. */ -struct cls_cursor cls_cursor_start(const struct classifier *cls, - const struct cls_rule *target) +struct cls_cursor +cls_cursor_start(const struct classifier *cls, const struct cls_rule *target) { struct cls_cursor cursor; struct cls_subtable *subtable; diff --git a/lib/classifier.h b/lib/classifier.h index c38d92200..d69c20191 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -126,9 +126,12 @@ * cls_subtable", with the other almost-identical rules chained off a linked * list inside that highest-priority rule. * + * The following sub-sections describe various optimizations over this simple + * approach. + * * * Staged Lookup (Wildcard Optimization) - * ===================================== + * ------------------------------------- * * Subtable lookup is performed in ranges defined for struct flow, starting * from metadata (registers, in_port, etc.), then L2 header, L3, and finally @@ -141,7 +144,7 @@ * * * Prefix Tracking (Wildcard Optimization) - * ======================================= + * --------------------------------------- * * Classifier uses prefix trees ("tries") for tracking the used * address space, enabling skipping classifier tables containing @@ -171,7 +174,7 @@ * * * Partitioning (Lookup Time and Wildcard Optimization) - * ==================================================== + * ---------------------------------------------------- * * Suppose that a given classifier is being used to handle multiple stages in a * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field @@ -207,6 +210,41 @@ * Each eliminated subtable lookup also reduces the amount of un-wildcarding. * * + * Tentative Modifications + * ======================= + * + * When a new rule is added to a classifier, it can optionally be "invisible". + * That means that lookups won't find the rule, although iterations through + * the classifier will see it. + * + * Similarly, deletions from a classifier can be "tentative", by setting + * 'to_be_removed' to true within the rule. A rule that is tentatively deleted + * will not appear in iterations, although it will still be found by lookups. + * + * Classifiers can hold duplicate rules (rules with the same match criteria and + * priority) when tentative modifications are involved: one (or more) identical + * tentatively deleted rules can coexist in a classifier with at most one + * identical invisible rule. + * + * The classifier supports tentative modifications for two reasons: + * + * 1. Performance: Adding (or deleting) a rule can, in pathological cases, + * have a cost proportional to the number of rules already in the + * classifier. When multiple rules are being added (or deleted) in one + * go, though, this cost can be paid just once, not once per addition + * (or deletion), as long as it is OK for any new rules to be invisible + * until the batch change is complete. + * + * 2. Staging additions and deletions: Invisibility allows a rule to be + * added tentatively, to possibly be modified or removed before it + * becomes visible. Tentatively deletion allows a rule to be scheduled + * for deletion before it is certain that the deletion is desirable. + * + * To use deferred publication, first call classifier_defer(). Then, modify + * the classifier via additions and deletions. Call cls_rule_make_visible() on + * each new rule at an appropriate time. Finally, call classifier_publish(). + * + * * Thread-safety * ============= * @@ -265,6 +303,13 @@ struct cls_conjunction { struct cls_rule { struct rculist node; /* In struct cls_subtable 'rules_list'. */ int priority; /* Larger numbers are higher priorities. */ + bool to_be_removed; /* Rule will be deleted. + * This is the only field that may be + * modified after the rule has been added to + * a classifier. Modifications are to be + * done only under same locking as all other + * classifier modifications. This field may + * not be examined by lookups. */ struct cls_match *cls_match; /* NULL if not in a classifier. */ struct minimatch match; /* Matching rule. */ }; From 7ac27a044457b560836f6dc9c11443ba7e02045d Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 1 Jun 2015 18:07:39 -0700 Subject: [PATCH 092/146] bundles: Validate bundled messages. OpenFlow bundle messages should be decoded and validated at the time they are added to the bundle. This commit does this for flow mod and port mod messages. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/ofp-print.c | 4 ++-- lib/ofp-util.c | 10 +++++++--- lib/ofp-util.h | 7 +++++-- ofproto/bundles.c | 36 ++++++++++++++-------------------- ofproto/bundles.h | 49 ++++++++++++++++++++++++++++++++++++++--------- ofproto/ofproto.c | 41 +++++++++++++++++++++++++++++++++++++-- 6 files changed, 107 insertions(+), 40 deletions(-) diff --git a/lib/ofp-print.c b/lib/ofp-print.c index cec074f1e..d773dca4f 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2657,7 +2657,7 @@ ofp_print_bundle_add(struct ds *s, const struct ofp_header *oh, int verbosity) struct ofputil_bundle_add_msg badd; char *msg; - error = ofputil_decode_bundle_add(oh, &badd); + error = ofputil_decode_bundle_add(oh, &badd, NULL); if (error) { ofp_print_error(s, error); return; diff --git a/lib/ofp-util.c b/lib/ofp-util.c index e62c584fd..17a0c412a 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -8852,7 +8852,8 @@ ofputil_is_bundlable(enum ofptype type) enum ofperr ofputil_decode_bundle_add(const struct ofp_header *oh, - struct ofputil_bundle_add_msg *msg) + struct ofputil_bundle_add_msg *msg, + enum ofptype *type_ptr) { const struct ofp14_bundle_ctrl_msg *m; struct ofpbuf b; @@ -8879,14 +8880,17 @@ ofputil_decode_bundle_add(const struct ofp_header *oh, } /* Reject unbundlable messages. */ - error = ofptype_decode(&type, msg->msg); + if (!type_ptr) { + type_ptr = &type; + } + error = ofptype_decode(type_ptr, msg->msg); if (error) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPT14_BUNDLE_ADD_MESSAGE contained " "message is unparsable (%s)", ofperr_get_name(error)); return OFPERR_OFPBFC_MSG_UNSUP; /* 'error' would be confusing. */ } - if (!ofputil_is_bundlable(type)) { + if (!ofputil_is_bundlable(*type_ptr)) { return OFPERR_OFPBFC_MSG_UNSUP; } diff --git a/lib/ofp-util.h b/lib/ofp-util.h index ee3f1bed6..efb5b18f0 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1109,6 +1109,8 @@ struct ofputil_bundle_add_msg { const struct ofp_header *msg; }; +enum ofptype; + enum ofperr ofputil_decode_bundle_ctrl(const struct ofp_header *, struct ofputil_bundle_ctrl_msg *); @@ -1119,5 +1121,6 @@ struct ofpbuf *ofputil_encode_bundle_add(enum ofp_version ofp_version, struct ofputil_bundle_add_msg *msg); enum ofperr ofputil_decode_bundle_add(const struct ofp_header *, - struct ofputil_bundle_add_msg *); + struct ofputil_bundle_add_msg *, + enum ofptype *type); #endif /* ofp-util.h */ diff --git a/ofproto/bundles.c b/ofproto/bundles.c index c40909150..f6ad6086e 100644 --- a/ofproto/bundles.c +++ b/ofproto/bundles.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2013, 2014 Alexandru Copot , with support from IXIA. * Copyright (c) 2013, 2014 Daniel Baluta - * Copyright (c) 2014 Nicira, Inc. + * Copyright (c) 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,11 +57,6 @@ struct ofp_bundle { struct ovs_list msg_list; }; -struct bundle_message { - struct ofp_header *msg; - struct ovs_list node; /* Element in 'struct ofp_bundles's msg_list */ -}; - static uint32_t bundle_hash(uint32_t id) { @@ -98,20 +93,19 @@ ofp_bundle_create(uint32_t id, uint16_t flags) } static void -ofp_bundle_remove(struct ofconn *ofconn, struct ofp_bundle *item) +ofp_bundle_remove(struct ofconn *ofconn, struct ofp_bundle *bundle) { - struct bundle_message *msg; + struct ofp_bundle_entry *msg; struct hmap *bundles; - LIST_FOR_EACH_POP (msg, node, &item->msg_list) { - free(msg->msg); - free(msg); + LIST_FOR_EACH_POP (msg, node, &bundle->msg_list) { + ofp_bundle_entry_free(msg); } bundles = ofconn_get_bundles(ofconn); - hmap_remove(bundles, &item->node); + hmap_remove(bundles, &bundle->node); - free(item); + free(bundle); } void @@ -187,7 +181,7 @@ ofp_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) struct hmap *bundles; struct ofp_bundle *bundle; enum ofperr error = 0; - struct bundle_message *msg; + struct ofp_bundle_entry *msg; bundles = ofconn_get_bundles(ofconn); bundle = ofp_bundle_find(bundles, id); @@ -227,31 +221,29 @@ ofp_bundle_discard(struct ofconn *ofconn, uint32_t id) } enum ofperr -ofp_bundle_add_message(struct ofconn *ofconn, struct ofputil_bundle_add_msg *badd) +ofp_bundle_add_message(struct ofconn *ofconn, uint32_t id, uint16_t flags, + struct ofp_bundle_entry *bmsg) { struct hmap *bundles; struct ofp_bundle *bundle; - struct bundle_message *bmsg; bundles = ofconn_get_bundles(ofconn); - bundle = ofp_bundle_find(bundles, badd->bundle_id); + bundle = ofp_bundle_find(bundles, id); if (!bundle) { - bundle = ofp_bundle_create(badd->bundle_id, badd->flags); + bundle = ofp_bundle_create(id, flags); bundle->state = BS_OPEN; bundles = ofconn_get_bundles(ofconn); - hmap_insert(bundles, &bundle->node, bundle_hash(badd->bundle_id)); + hmap_insert(bundles, &bundle->node, bundle_hash(id)); } else if (bundle->state == BS_CLOSED) { ofp_bundle_remove(ofconn, bundle); return OFPERR_OFPBFC_BUNDLE_CLOSED; - } else if (badd->flags != bundle->flags) { + } else if (flags != bundle->flags) { ofp_bundle_remove(ofconn, bundle); return OFPERR_OFPBFC_BAD_FLAGS; } - bmsg = xmalloc(sizeof *bmsg); - bmsg->msg = xmemdup(badd->msg, ntohs(badd->msg->length)); list_push_back(&bundle->msg_list, &bmsg->node); return 0; } diff --git a/ofproto/bundles.h b/ofproto/bundles.h index 9a6dfa5a1..c8ce5c985 100644 --- a/ofproto/bundles.h +++ b/ofproto/bundles.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2013, 2014 Alexandru Copot , with support from IXIA. * Copyright (c) 2013, 2014 Daniel Baluta + * Copyright (c) 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,27 +21,57 @@ #include -#include "ofp-msgs.h" #include "connmgr.h" +#include "list.h" +#include "ofp-msgs.h" #include "ofp-util.h" #ifdef __cplusplus extern "C" { #endif +struct ofp_bundle_entry { + struct ovs_list node; + ovs_be32 xid; /* For error returns. */ + enum ofptype type; /* OFPTYPE_FLOW_MOD or OFPTYPE_PORT_MOD. */ + union { + struct ofputil_flow_mod fm; /* 'fm.ofpacts' must be malloced. */ + struct ofputil_port_mod pm; + }; +}; -enum ofperr ofp_bundle_open(struct ofconn *ofconn, uint32_t id, uint16_t flags); +static inline struct ofp_bundle_entry *ofp_bundle_entry_alloc( + enum ofptype type, ovs_be32 xid); +static inline void ofp_bundle_entry_free(struct ofp_bundle_entry *); -enum ofperr ofp_bundle_close(struct ofconn *ofconn, uint32_t id, uint16_t flags); +enum ofperr ofp_bundle_open(struct ofconn *, uint32_t id, uint16_t flags); +enum ofperr ofp_bundle_close(struct ofconn *, uint32_t id, uint16_t flags); +enum ofperr ofp_bundle_commit(struct ofconn *, uint32_t id, uint16_t flags); +enum ofperr ofp_bundle_discard(struct ofconn *, uint32_t id); +enum ofperr ofp_bundle_add_message(struct ofconn *, uint32_t id, + uint16_t flags, struct ofp_bundle_entry *); +void ofp_bundle_remove_all(struct ofconn *); + +static inline struct ofp_bundle_entry * +ofp_bundle_entry_alloc(enum ofptype type, ovs_be32 xid) +{ + struct ofp_bundle_entry *entry = xmalloc(sizeof *entry); -enum ofperr ofp_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags); + entry->xid = xid; + entry->type = type; -enum ofperr ofp_bundle_discard(struct ofconn *ofconn, uint32_t id); + return entry; +} -enum ofperr ofp_bundle_add_message(struct ofconn *ofconn, - struct ofputil_bundle_add_msg *badd); - -void ofp_bundle_remove_all(struct ofconn *ofconn); +static inline void ofp_bundle_entry_free(struct ofp_bundle_entry *entry) +{ + if (entry) { + if (entry->type == OFPTYPE_FLOW_MOD) { + free(entry->fm.ofpacts); + } + free(entry); + } +} #ifdef __cplusplus } diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 9c4e97d27..0a8c82af5 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -6357,20 +6357,57 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) static enum ofperr handle_bundle_add(struct ofconn *ofconn, const struct ofp_header *oh) { + struct ofproto *ofproto = ofconn_get_ofproto(ofconn); enum ofperr error; struct ofputil_bundle_add_msg badd; + struct ofp_bundle_entry *bmsg; + enum ofptype type; error = reject_slave_controller(ofconn); if (error) { return error; } - error = ofputil_decode_bundle_add(oh, &badd); + error = ofputil_decode_bundle_add(oh, &badd, &type); if (error) { return error; } - return ofp_bundle_add_message(ofconn, &badd); + bmsg = ofp_bundle_entry_alloc(type, badd.msg->xid); + + if (type == OFPTYPE_PORT_MOD) { + error = ofputil_decode_port_mod(badd.msg, &bmsg->pm, false); + } else if (type == OFPTYPE_FLOW_MOD) { + struct ofpbuf ofpacts; + uint64_t ofpacts_stub[1024 / 8]; + + ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); + error = ofputil_decode_flow_mod(&bmsg->fm, badd.msg, + ofconn_get_protocol(ofconn), + &ofpacts, + u16_to_ofp(ofproto->max_ports), + ofproto->n_tables); + /* Move actions to heap. */ + bmsg->fm.ofpacts = ofpbuf_steal_data(&ofpacts); + + if (!error && bmsg->fm.ofpacts_len) { + error = ofproto_check_ofpacts(ofproto, bmsg->fm.ofpacts, + bmsg->fm.ofpacts_len); + } + } else { + OVS_NOT_REACHED(); + } + + if (!error) { + error = ofp_bundle_add_message(ofconn, badd.bundle_id, badd.flags, + bmsg); + } + + if (error) { + ofp_bundle_entry_free(bmsg); + } + + return error; } static enum ofperr From ff09bc0814e4596fd00ddbb2add6bd7d93b09755 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 093/146] bundles: Manage bundles in connmgr. This will make implementing bundle commit in ofproto.c easier. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/bundles.c | 116 ++++++++++++---------------------------------- ofproto/bundles.h | 19 +++++++- ofproto/connmgr.c | 51 ++++++++++++++++++-- ofproto/connmgr.h | 6 ++- 4 files changed, 98 insertions(+), 94 deletions(-) diff --git a/ofproto/bundles.c b/ofproto/bundles.c index f6ad6086e..ebf8f7ff7 100644 --- a/ofproto/bundles.c +++ b/ofproto/bundles.c @@ -42,41 +42,6 @@ VLOG_DEFINE_THIS_MODULE(bundles); -enum bundle_state { - BS_OPEN, - BS_CLOSED -}; - -struct ofp_bundle { - struct hmap_node node; /* In struct ofconn's "bundles" hmap. */ - uint32_t id; - uint16_t flags; - enum bundle_state state; - - /* List of 'struct bundle_message's */ - struct ovs_list msg_list; -}; - -static uint32_t -bundle_hash(uint32_t id) -{ - return hash_int(id, 0); -} - -static struct ofp_bundle * -ofp_bundle_find(struct hmap *bundles, uint32_t id) -{ - struct ofp_bundle *bundle; - - HMAP_FOR_EACH_IN_BUCKET(bundle, node, bundle_hash(id), bundles) { - if (bundle->id == id) { - return bundle; - } - } - - return NULL; -} - static struct ofp_bundle * ofp_bundle_create(uint32_t id, uint16_t flags) { @@ -86,88 +51,68 @@ ofp_bundle_create(uint32_t id, uint16_t flags) bundle->id = id; bundle->flags = flags; + bundle->state = BS_OPEN; list_init(&bundle->msg_list); return bundle; } -static void -ofp_bundle_remove(struct ofconn *ofconn, struct ofp_bundle *bundle) +void +ofp_bundle_remove__(struct ofconn *ofconn, struct ofp_bundle *bundle) { struct ofp_bundle_entry *msg; - struct hmap *bundles; LIST_FOR_EACH_POP (msg, node, &bundle->msg_list) { ofp_bundle_entry_free(msg); } - bundles = ofconn_get_bundles(ofconn); - hmap_remove(bundles, &bundle->node); - + ofconn_remove_bundle(ofconn, bundle); free(bundle); } -void -ofp_bundle_remove_all(struct ofconn *ofconn) -{ - struct ofp_bundle *b, *next; - struct hmap *bundles; - - bundles = ofconn_get_bundles(ofconn); - - HMAP_FOR_EACH_SAFE (b, next, node, bundles) { - ofp_bundle_remove(ofconn, b); - } -} - enum ofperr ofp_bundle_open(struct ofconn *ofconn, uint32_t id, uint16_t flags) { - struct hmap *bundles; struct ofp_bundle *bundle; + enum ofperr error; - bundles = ofconn_get_bundles(ofconn); - bundle = ofp_bundle_find(bundles, id); + bundle = ofconn_get_bundle(ofconn, id); if (bundle) { VLOG_INFO("Bundle %x already exists.", id); - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return OFPERR_OFPBFC_BAD_ID; } - /* XXX: Check the limit of open bundles */ - bundle = ofp_bundle_create(id, flags); - bundle->state = BS_OPEN; + error = ofconn_insert_bundle(ofconn, bundle); + if (error) { + free(bundle); + } - bundles = ofconn_get_bundles(ofconn); - hmap_insert(bundles, &bundle->node, bundle_hash(id)); - - return 0; + return error; } enum ofperr ofp_bundle_close(struct ofconn *ofconn, uint32_t id, uint16_t flags) { - struct hmap *bundles; struct ofp_bundle *bundle; - bundles = ofconn_get_bundles(ofconn); - bundle = ofp_bundle_find(bundles, id); + bundle = ofconn_get_bundle(ofconn, id); if (!bundle) { return OFPERR_OFPBFC_BAD_ID; } if (bundle->state == BS_CLOSED) { - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return OFPERR_OFPBFC_BUNDLE_CLOSED; } if (bundle->flags != flags) { - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return OFPERR_OFPBFC_BAD_FLAGS; } @@ -178,13 +123,11 @@ ofp_bundle_close(struct ofconn *ofconn, uint32_t id, uint16_t flags) enum ofperr ofp_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) { - struct hmap *bundles; struct ofp_bundle *bundle; enum ofperr error = 0; struct ofp_bundle_entry *msg; - bundles = ofconn_get_bundles(ofconn); - bundle = ofp_bundle_find(bundles, id); + bundle = ofconn_get_bundle(ofconn, id); if (!bundle) { return OFPERR_OFPBFC_BAD_ID; @@ -198,24 +141,22 @@ ofp_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) } } - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return error; } enum ofperr ofp_bundle_discard(struct ofconn *ofconn, uint32_t id) { - struct hmap *bundles; struct ofp_bundle *bundle; - bundles = ofconn_get_bundles(ofconn); - bundle = ofp_bundle_find(bundles, id); + bundle = ofconn_get_bundle(ofconn, id); if (!bundle) { return OFPERR_OFPBFC_BAD_ID; } - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return 0; } @@ -224,23 +165,24 @@ enum ofperr ofp_bundle_add_message(struct ofconn *ofconn, uint32_t id, uint16_t flags, struct ofp_bundle_entry *bmsg) { - struct hmap *bundles; struct ofp_bundle *bundle; - bundles = ofconn_get_bundles(ofconn); - bundle = ofp_bundle_find(bundles, id); + bundle = ofconn_get_bundle(ofconn, id); if (!bundle) { - bundle = ofp_bundle_create(id, flags); - bundle->state = BS_OPEN; + enum ofperr error; - bundles = ofconn_get_bundles(ofconn); - hmap_insert(bundles, &bundle->node, bundle_hash(id)); + bundle = ofp_bundle_create(id, flags); + error = ofconn_insert_bundle(ofconn, bundle); + if (error) { + free(bundle); + return error; + } } else if (bundle->state == BS_CLOSED) { - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return OFPERR_OFPBFC_BUNDLE_CLOSED; } else if (flags != bundle->flags) { - ofp_bundle_remove(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle); return OFPERR_OFPBFC_BAD_FLAGS; } diff --git a/ofproto/bundles.h b/ofproto/bundles.h index c8ce5c985..b885c9b49 100644 --- a/ofproto/bundles.h +++ b/ofproto/bundles.h @@ -22,7 +22,6 @@ #include #include "connmgr.h" -#include "list.h" #include "ofp-msgs.h" #include "ofp-util.h" @@ -40,6 +39,21 @@ struct ofp_bundle_entry { }; }; +enum bundle_state { + BS_OPEN, + BS_CLOSED +}; + +struct ofp_bundle { + struct hmap_node node; /* In struct ofconn's "bundles" hmap. */ + uint32_t id; + uint16_t flags; + enum bundle_state state; + + /* List of 'struct bundle_message's */ + struct ovs_list msg_list; +}; + static inline struct ofp_bundle_entry *ofp_bundle_entry_alloc( enum ofptype type, ovs_be32 xid); static inline void ofp_bundle_entry_free(struct ofp_bundle_entry *); @@ -50,7 +64,8 @@ enum ofperr ofp_bundle_commit(struct ofconn *, uint32_t id, uint16_t flags); enum ofperr ofp_bundle_discard(struct ofconn *, uint32_t id); enum ofperr ofp_bundle_add_message(struct ofconn *, uint32_t id, uint16_t flags, struct ofp_bundle_entry *); -void ofp_bundle_remove_all(struct ofconn *); + +void ofp_bundle_remove__(struct ofconn *ofconn, struct ofp_bundle *bundle); static inline struct ofp_bundle_entry * ofp_bundle_entry_alloc(enum ofptype type, ovs_be32 xid) diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 707385f00..9851997f9 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1168,13 +1168,56 @@ ofconn_report_flow_mod(struct ofconn *ofconn, } ofconn->last_op = now; } + +/* OpenFlow 1.4 bundles. */ -struct hmap * -ofconn_get_bundles(struct ofconn *ofconn) +static inline uint32_t +bundle_hash(uint32_t id) { - return &ofconn->bundles; + return hash_int(id, 0); } +struct ofp_bundle * +ofconn_get_bundle(struct ofconn *ofconn, uint32_t id) +{ + struct ofp_bundle *bundle; + + HMAP_FOR_EACH_IN_BUCKET(bundle, node, bundle_hash(id), &ofconn->bundles) { + if (bundle->id == id) { + return bundle; + } + } + + return NULL; +} + +enum ofperr +ofconn_insert_bundle(struct ofconn *ofconn, struct ofp_bundle *bundle) +{ + /* XXX: Check the limit of open bundles */ + + hmap_insert(&ofconn->bundles, &bundle->node, bundle_hash(bundle->id)); + + return 0; +} + +enum ofperr +ofconn_remove_bundle(struct ofconn *ofconn, struct ofp_bundle *bundle) +{ + hmap_remove(&ofconn->bundles, &bundle->node); + + return 0; +} + +static void +bundle_remove_all(struct ofconn *ofconn) +{ + struct ofp_bundle *b, *next; + + HMAP_FOR_EACH_SAFE (b, next, node, &ofconn->bundles) { + ofp_bundle_remove__(ofconn, b); + } +} /* Private ofconn functions. */ @@ -1300,7 +1343,7 @@ ofconn_destroy(struct ofconn *ofconn) hmap_remove(&ofconn->connmgr->controllers, &ofconn->hmap_node); } - ofp_bundle_remove_all(ofconn); + bundle_remove_all(ofconn); hmap_destroy(&ofconn->bundles); hmap_destroy(&ofconn->monitors); diff --git a/ofproto/connmgr.h b/ofproto/connmgr.h index 193afa853..0e1a5b1af 100644 --- a/ofproto/connmgr.h +++ b/ofproto/connmgr.h @@ -156,7 +156,11 @@ void ofconn_send_error(const struct ofconn *, const struct ofp_header *request, enum ofperr ofconn_pktbuf_retrieve(struct ofconn *, uint32_t id, struct dp_packet **bufferp, ofp_port_t *in_port); -struct hmap *ofconn_get_bundles(struct ofconn *ofconn); +struct ofp_bundle; + +struct ofp_bundle *ofconn_get_bundle(struct ofconn *, uint32_t id); +enum ofperr ofconn_insert_bundle(struct ofconn *, struct ofp_bundle *); +enum ofperr ofconn_remove_bundle(struct ofconn *, struct ofp_bundle *); /* Logging flow_mod summaries. */ void ofconn_report_flow_mod(struct ofconn *, enum ofp_flow_mod_command); From bc5e6a916a6f4a55cb85a222ae1c6685e216bdf7 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 094/146] ofproto: Factor out ofproto_rule_insert__(). Pairs well with ofproto_rule_remove__(). Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 0a8c82af5..95afc242a 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -236,6 +236,8 @@ struct ofport_usage { /* rule. */ static void ofproto_rule_send_removed(struct rule *, uint8_t reason); static bool rule_is_readonly(const struct rule *); +static void ofproto_rule_insert__(struct ofproto *, struct rule *) + OVS_REQUIRES(ofproto_mutex); static void ofproto_rule_remove__(struct ofproto *, struct rule *) OVS_REQUIRES(ofproto_mutex); @@ -4408,14 +4410,7 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return error; } - if (fm->hard_timeout || fm->idle_timeout) { - list_insert(&ofproto->expirable, &rule->expirable); - } - cookies_insert(ofproto, rule); - eviction_group_add_rule(rule); - if (actions->has_meter) { - meter_insert_rule(rule); - } + ofproto_rule_insert__(ofproto, rule); classifier_defer(&table->cls); @@ -7014,6 +7009,24 @@ oftable_enable_eviction(struct oftable *table, } } +/* Inserts 'rule' from the ofproto data structures BEFORE caller has inserted + * it to the classifier. */ +static void +ofproto_rule_insert__(struct ofproto *ofproto, struct rule *rule) + OVS_REQUIRES(ofproto_mutex) +{ + const struct rule_actions *actions = rule_get_actions(rule); + + if (rule->hard_timeout || rule->idle_timeout) { + list_insert(&ofproto->expirable, &rule->expirable); + } + cookies_insert(ofproto, rule); + eviction_group_add_rule(rule); + if (actions->has_meter) { + meter_insert_rule(rule); + } +} + /* Removes 'rule' from the ofproto data structures AFTER caller has removed * it from the classifier. */ static void From d66713776f50ee3a6f83d9b9bfc86a6477f7079d Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:38 -0700 Subject: [PATCH 095/146] ofproto: Factor out ofproto_rule_create(). Makes add_flow() easier to read and understand. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 96 +++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 95afc242a..75aef5281 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2702,6 +2702,62 @@ ofproto_rule_destroy__(struct rule *rule) rule->ofproto->ofproto_class->rule_dealloc(rule); } +/* Create a new rule based on attributes in 'fm', match in 'cr', and + * 'table_id'. Note that the rule is NOT inserted into a any data structures + * yet. Takes ownership of 'cr'. */ +static enum ofperr +ofproto_rule_create(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct cls_rule *cr, uint8_t table_id, + struct rule **rulep) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule *rule; + enum ofperr error; + + /* Allocate new rule. */ + rule = ofproto->ofproto_class->rule_alloc(); + if (!rule) { + cls_rule_destroy(cr); + VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); + return ENOMEM; + } + + /* Initialize base state. */ + *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto; + cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr); + ovs_refcount_init(&rule->ref_count); + rule->flow_cookie = fm->new_cookie; + rule->created = rule->modified = time_msec(); + + ovs_mutex_init(&rule->mutex); + ovs_mutex_lock(&rule->mutex); + rule->idle_timeout = fm->idle_timeout; + rule->hard_timeout = fm->hard_timeout; + rule->importance = fm->importance; + ovs_mutex_unlock(&rule->mutex); + + *CONST_CAST(uint8_t *, &rule->table_id) = table_id; + rule->flags = fm->flags & OFPUTIL_FF_STATE; + ovsrcu_set_hidden(&rule->actions, + rule_actions_create(fm->ofpacts, fm->ofpacts_len)); + list_init(&rule->meter_list_node); + rule->eviction_group = NULL; + list_init(&rule->expirable); + rule->monitor_flags = 0; + rule->add_seqno = 0; + rule->modify_seqno = 0; + + /* Construct rule, initializing derived state. */ + error = ofproto->ofproto_class->rule_construct(rule); + if (error) { + ofproto_rule_destroy__(rule); + return error; + } + + *rulep = rule; + return 0; +} + static void rule_destroy_cb(struct rule *rule) { @@ -4292,7 +4348,6 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req) OVS_REQUIRES(ofproto_mutex) { - const struct rule_actions *actions; struct oftable *table; struct cls_rule cr; struct rule *rule; @@ -4371,42 +4426,9 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } /* Allocate new rule. */ - rule = ofproto->ofproto_class->rule_alloc(); - if (!rule) { - cls_rule_destroy(&cr); - VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); - return ENOMEM; - } - - /* Initialize base state. */ - *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto; - cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), &cr); - ovs_refcount_init(&rule->ref_count); - rule->flow_cookie = fm->new_cookie; - rule->created = rule->modified = time_msec(); - - ovs_mutex_init(&rule->mutex); - ovs_mutex_lock(&rule->mutex); - rule->idle_timeout = fm->idle_timeout; - rule->hard_timeout = fm->hard_timeout; - rule->importance = fm->importance; - ovs_mutex_unlock(&rule->mutex); - - *CONST_CAST(uint8_t *, &rule->table_id) = table - ofproto->tables; - rule->flags = fm->flags & OFPUTIL_FF_STATE; - actions = rule_actions_create(fm->ofpacts, fm->ofpacts_len); - ovsrcu_set(&rule->actions, actions); - list_init(&rule->meter_list_node); - rule->eviction_group = NULL; - list_init(&rule->expirable); - rule->monitor_flags = 0; - rule->add_seqno = 0; - rule->modify_seqno = 0; - - /* Construct rule, initializing derived state. */ - error = ofproto->ofproto_class->rule_construct(rule); + error = ofproto_rule_create(ofproto, fm, &cr, table - ofproto->tables, + &rule); if (error) { - ofproto_rule_destroy__(rule); return error; } @@ -4429,7 +4451,7 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, cls_rule_make_visible(&rule->cr); classifier_publish(&table->cls); - learned_cookies_inc(ofproto, actions); + learned_cookies_inc(ofproto, rule_get_actions(rule)); if (minimask_get_vid_mask(&rule->cr.match.mask) == VLAN_VID_MASK) { if (ofproto->vlan_bitmap) { From d51c8b711d230811076e678293ae0650eb8f5186 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 1 Jun 2015 14:13:05 -0700 Subject: [PATCH 096/146] ofproto: Use enum ofperr for 'error'. Clean up error return types and related comments. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto-provider.h | 2 +- ofproto/ofproto.c | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 9222fe431..2e4d33380 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -1766,7 +1766,7 @@ extern const struct ofproto_class ofproto_dpif_class; int ofproto_class_register(const struct ofproto_class *); int ofproto_class_unregister(const struct ofproto_class *); -int ofproto_flow_mod(struct ofproto *, struct ofputil_flow_mod *) +enum ofperr ofproto_flow_mod(struct ofproto *, struct ofputil_flow_mod *) OVS_EXCLUDED(ofproto_mutex); void ofproto_add_flow(struct ofproto *, const struct match *, int priority, const struct ofpact *ofpacts, size_t ofpacts_len) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 75aef5281..53dbd59d2 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2050,13 +2050,12 @@ ofproto_add_flow(struct ofproto *ofproto, const struct match *match, } } -/* Executes the flow modification specified in 'fm'. Returns 0 on success, an - * OFPERR_* OpenFlow error code on failure, or OFPROTO_POSTPONE if the - * operation cannot be initiated now but may be retried later. +/* Executes the flow modification specified in 'fm'. Returns 0 on success, or + * an OFPERR_* OpenFlow error code on failure. * * This is a helper function for in-band control and fail-open and the "learn" * action. */ -int +enum ofperr ofproto_flow_mod(struct ofproto *ofproto, struct ofputil_flow_mod *fm) OVS_EXCLUDED(ofproto_mutex) { @@ -2719,7 +2718,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct ofputil_flow_mod *fm, if (!rule) { cls_rule_destroy(cr); VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); - return ENOMEM; + return OFPERR_OFPFMFC_UNKNOWN; } /* Initialize base state. */ @@ -3866,7 +3865,7 @@ collect_rules_strict(struct ofproto *ofproto, { struct oftable *table; size_t n_readonly = 0; - int error = 0; + enum ofperr error = 0; rule_collection_init(rules); @@ -4335,9 +4334,8 @@ set_conjunctions(struct rule *rule, const struct cls_conjunction *conjs, * in which no matching flow already exists in the flow table. * * Adds the flow specified by 'ofm', which is followed by 'n_actions' - * ofp_actions, to the ofproto's flow table. Returns 0 on success, an OpenFlow - * error code on failure, or OFPROTO_POSTPONE if the operation cannot be - * initiated now but may be retried later. + * ofp_actions, to the ofproto's flow table. Returns 0 on success, or an + * OpenFlow error code on failure. * * The caller retains ownership of 'fm->ofpacts'. * @@ -4352,7 +4350,7 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct cls_rule cr; struct rule *rule; uint8_t table_id; - int error = 0; + enum ofperr error = 0; if (!check_table_id(ofproto, fm->table_id)) { error = OFPERR_OFPBRC_BAD_TABLE_ID; @@ -4641,7 +4639,7 @@ modify_flows_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, { struct rule_criteria criteria; struct rule_collection rules; - int error; + enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); @@ -4670,7 +4668,7 @@ modify_flow_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, { struct rule_criteria criteria; struct rule_collection rules; - int error; + enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); @@ -6370,7 +6368,6 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) return error; } - static enum ofperr handle_bundle_add(struct ofconn *ofconn, const struct ofp_header *oh) { @@ -6610,7 +6607,8 @@ static void handle_openflow(struct ofconn *ofconn, const struct ofpbuf *ofp_msg) OVS_EXCLUDED(ofproto_mutex) { - int error = handle_openflow__(ofconn, ofp_msg); + enum ofperr error = handle_openflow__(ofconn, ofp_msg); + if (error) { ofconn_send_error(ofconn, ofp_msg->data, error); } From dd27be824c36938b773c42348547eae48168ea66 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:39 -0700 Subject: [PATCH 097/146] ofproto: Refactor modify_flows__(). Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 349 ++++++++++++++++++++++++++-------------------- 1 file changed, 200 insertions(+), 149 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 53dbd59d2..3eca3b04a 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -256,9 +256,14 @@ struct flow_mod_requester { static enum ofperr add_flow(struct ofproto *, struct ofputil_flow_mod *, const struct flow_mod_requester *); -static enum ofperr modify_flows__(struct ofproto *, struct ofputil_flow_mod *, - const struct rule_collection *, - const struct flow_mod_requester *); +static enum ofperr modify_flow_check__(struct ofproto *, + struct ofputil_flow_mod *, + const struct rule *) + OVS_REQUIRES(ofproto_mutex); +static void modify_flow__(struct ofproto *, struct ofputil_flow_mod *, + struct rule *, const struct flow_mod_requester *, + struct ovs_list *dead_cookies) + OVS_REQUIRES(ofproto_mutex); static void delete_flows__(const struct rule_collection *, enum ofp_flow_removed_reason, const struct flow_mod_requester *) @@ -4395,16 +4400,18 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, /* Transform "add" into "modify" if there's an existing identical flow. */ rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls, &cr)); if (rule) { - struct rule_collection rules; - cls_rule_destroy(&cr); - rule_collection_init(&rules); - rule_collection_add(&rules, rule); fm->modify_cookie = true; - error = modify_flows__(ofproto, fm, &rules, req); - rule_collection_destroy(&rules); + error = modify_flow_check__(ofproto, fm, rule); + if (!error) { + struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); + modify_flow__(ofproto, fm, rule, req, &dead_cookies); + learned_cookies_flush(ofproto, &dead_cookies); + + goto send_packet; + } return error; } @@ -4465,160 +4472,186 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, ofmonitor_report(ofproto->connmgr, rule, NXFME_ADDED, 0, req ? req->ofconn : NULL, req ? req->xid : 0, NULL); - +send_packet: return req ? send_buffered_packet(req->ofconn, fm->buffer_id, rule) : 0; } /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */ -/* Modifies the rules listed in 'rules', changing their actions to match those - * in 'fm'. - * - * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, - * if any. +/* Checks if the 'rule' can be modified to match 'fm'. * * Returns 0 on success, otherwise an OpenFlow error code. */ static enum ofperr -modify_flows__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct rule_collection *rules, - const struct flow_mod_requester *req) +modify_flow_check__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct rule *rule) OVS_REQUIRES(ofproto_mutex) { - struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); - enum nx_flow_update_event event; + if (ofproto->ofproto_class->rule_premodify_actions) { + return ofproto->ofproto_class->rule_premodify_actions( + rule, fm->ofpacts, fm->ofpacts_len); + } + return 0; +} + +/* Checks if the rules listed in 'rules' can have their actions changed to + * match those in 'fm'. + * + * Returns 0 on success, otherwise an OpenFlow error code. */ +static enum ofperr +modify_flows_check__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + enum ofperr error; size_t i; if (ofproto->ofproto_class->rule_premodify_actions) { for (i = 0; i < rules->n; i++) { - struct rule *rule = rules->rules[i]; - enum ofperr error; - - error = ofproto->ofproto_class->rule_premodify_actions( - rule, fm->ofpacts, fm->ofpacts_len); + error = modify_flow_check__(ofproto, fm, rules->rules[i]); if (error) { return error; } } } - event = fm->command == OFPFC_ADD ? NXFME_ADDED : NXFME_MODIFIED; - for (i = 0; i < rules->n; i++) { - struct rule *rule = rules->rules[i]; - - /* 'fm' says that */ - bool change_cookie = (fm->modify_cookie - && fm->new_cookie != OVS_BE64_MAX - && fm->new_cookie != rule->flow_cookie); - - const struct rule_actions *actions = rule_get_actions(rule); - bool change_actions = !ofpacts_equal(fm->ofpacts, fm->ofpacts_len, - actions->ofpacts, - actions->ofpacts_len); - - bool reset_counters = (fm->flags & OFPUTIL_FF_RESET_COUNTS) != 0; - - long long int now = time_msec(); - - if (change_cookie) { - cookies_remove(ofproto, rule); - } - - ovs_mutex_lock(&rule->mutex); - if (fm->command == OFPFC_ADD) { - rule->idle_timeout = fm->idle_timeout; - rule->hard_timeout = fm->hard_timeout; - rule->importance = fm->importance; - rule->flags = fm->flags & OFPUTIL_FF_STATE; - rule->created = now; - } - if (change_cookie) { - rule->flow_cookie = fm->new_cookie; - } - rule->modified = now; - ovs_mutex_unlock(&rule->mutex); - - if (change_cookie) { - cookies_insert(ofproto, rule); - } - if (fm->command == OFPFC_ADD) { - if (fm->idle_timeout || fm->hard_timeout || fm->importance) { - if (!rule->eviction_group) { - eviction_group_add_rule(rule); - } - } else { - eviction_group_remove_rule(rule); - } - } - - if (change_actions) { - /* We have to change the actions. The rule's conjunctive match set - * is a function of its actions, so we need to update that too. The - * conjunctive match set is used in the lookup process to figure - * which (if any) collection of conjunctive sets the packet matches - * with. However, a rule with conjunction actions is never to be - * returned as a classifier lookup result. To make sure a rule with - * conjunction actions is not returned as a lookup result, we update - * them in a carefully chosen order: - * - * - If we're adding a conjunctive match set where there wasn't one - * before, we have to make the conjunctive match set available to - * lookups before the rule's actions are changed, as otherwise - * rule with a conjunction action could be returned as a lookup - * result. - * - * - To clear some nonempty conjunctive set, we set the rule's - * actions first, so that a lookup can't return a rule with - * conjunction actions. - * - * - Otherwise, order doesn't matter for changing one nonempty - * conjunctive match set to some other nonempty set, since the - * rule's actions are not seen by the classifier, and hence don't - * matter either before or after the change. */ - struct cls_conjunction *conjs; - size_t n_conjs; - get_conjunctions(fm, &conjs, &n_conjs); - - if (n_conjs) { - set_conjunctions(rule, conjs, n_conjs); - } - ovsrcu_set(&rule->actions, rule_actions_create(fm->ofpacts, - fm->ofpacts_len)); - if (!conjs) { - set_conjunctions(rule, conjs, n_conjs); - } - - free(conjs); - } - - if (change_actions || reset_counters) { - ofproto->ofproto_class->rule_modify_actions(rule, reset_counters); - } - - if (event != NXFME_MODIFIED || change_actions || change_cookie) { - ofmonitor_report(ofproto->connmgr, rule, event, 0, - req ? req->ofconn : NULL, req ? req->xid : 0, - change_actions ? actions : NULL); - } - - if (change_actions) { - learned_cookies_inc(ofproto, rule_get_actions(rule)); - learned_cookies_dec(ofproto, actions, &dead_cookies); - rule_actions_destroy(actions); - } - } - learned_cookies_flush(ofproto, &dead_cookies); - - if (fm->buffer_id != UINT32_MAX && req) { - return send_buffered_packet(req->ofconn, fm->buffer_id, - rules->rules[0]); - } - return 0; } +/* Modifies the 'rule', changing them to match 'fm'. */ +static void +modify_flow__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule *rule, const struct flow_mod_requester *req, + struct ovs_list *dead_cookies) + OVS_REQUIRES(ofproto_mutex) +{ + enum nx_flow_update_event event = fm->command == OFPFC_ADD + ? NXFME_ADDED : NXFME_MODIFIED; + + /* 'fm' says that */ + bool change_cookie = (fm->modify_cookie + && fm->new_cookie != OVS_BE64_MAX + && fm->new_cookie != rule->flow_cookie); + + const struct rule_actions *actions = rule_get_actions(rule); + bool change_actions = !ofpacts_equal(fm->ofpacts, fm->ofpacts_len, + actions->ofpacts, + actions->ofpacts_len); + + bool reset_counters = (fm->flags & OFPUTIL_FF_RESET_COUNTS) != 0; + + long long int now = time_msec(); + + if (change_cookie) { + cookies_remove(ofproto, rule); + } + + ovs_mutex_lock(&rule->mutex); + if (fm->command == OFPFC_ADD) { + rule->idle_timeout = fm->idle_timeout; + rule->hard_timeout = fm->hard_timeout; + rule->importance = fm->importance; + rule->flags = fm->flags & OFPUTIL_FF_STATE; + rule->created = now; + } + if (change_cookie) { + rule->flow_cookie = fm->new_cookie; + } + rule->modified = now; + ovs_mutex_unlock(&rule->mutex); + + if (change_cookie) { + cookies_insert(ofproto, rule); + } + if (fm->command == OFPFC_ADD) { + if (fm->idle_timeout || fm->hard_timeout || fm->importance) { + if (!rule->eviction_group) { + eviction_group_add_rule(rule); + } + } else { + eviction_group_remove_rule(rule); + } + } + + if (change_actions) { + /* We have to change the actions. The rule's conjunctive match set + * is a function of its actions, so we need to update that too. The + * conjunctive match set is used in the lookup process to figure + * which (if any) collection of conjunctive sets the packet matches + * with. However, a rule with conjunction actions is never to be + * returned as a classifier lookup result. To make sure a rule with + * conjunction actions is not returned as a lookup result, we update + * them in a carefully chosen order: + * + * - If we're adding a conjunctive match set where there wasn't one + * before, we have to make the conjunctive match set available to + * lookups before the rule's actions are changed, as otherwise + * rule with a conjunction action could be returned as a lookup + * result. + * + * - To clear some nonempty conjunctive set, we set the rule's + * actions first, so that a lookup can't return a rule with + * conjunction actions. + * + * - Otherwise, order doesn't matter for changing one nonempty + * conjunctive match set to some other nonempty set, since the + * rule's actions are not seen by the classifier, and hence don't + * matter either before or after the change. */ + struct cls_conjunction *conjs; + size_t n_conjs; + get_conjunctions(fm, &conjs, &n_conjs); + + if (n_conjs) { + set_conjunctions(rule, conjs, n_conjs); + } + ovsrcu_set(&rule->actions, rule_actions_create(fm->ofpacts, + fm->ofpacts_len)); + if (!conjs) { + set_conjunctions(rule, conjs, n_conjs); + } + + free(conjs); + } + + if (change_actions || reset_counters) { + ofproto->ofproto_class->rule_modify_actions(rule, reset_counters); + } + + if (event != NXFME_MODIFIED || change_actions || change_cookie) { + ofmonitor_report(ofproto->connmgr, rule, event, 0, + req ? req->ofconn : NULL, req ? req->xid : 0, + change_actions ? actions : NULL); + } + + if (change_actions) { + learned_cookies_inc(ofproto, rule_get_actions(rule)); + learned_cookies_dec(ofproto, actions, dead_cookies); + rule_actions_destroy(actions); + } +} + +/* Modifies the rules listed in 'rules', changing their actions to match those + * in 'fm'. + * + * 'req' is used to retrieve the packet buffer specified in fm->buffer_id, + * if any. */ +static void +modify_flows__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct rule_collection *rules, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); + size_t i; + + for (i = 0; i < rules->n; i++) { + modify_flow__(ofproto, fm, rules->rules[i], req, &dead_cookies); + } + learned_cookies_flush(ofproto, &dead_cookies); +} + static enum ofperr -modify_flows_add(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) +modify_flows_add__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req) OVS_REQUIRES(ofproto_mutex) { if (fm->cookie_mask != htonll(0) || fm->new_cookie == OVS_BE64_MAX) { @@ -4627,6 +4660,31 @@ modify_flows_add(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return add_flow(ofproto, fm, req); } +static enum ofperr +modify_flows(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct rule_collection *rules, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + enum ofperr error; + + if (rules->n > 0) { + error = modify_flows_check__(ofproto, fm, rules); + if (!error) { + modify_flows__(ofproto, fm, rules, req); + + if (fm->buffer_id != UINT32_MAX && req) { + error = send_buffered_packet(req->ofconn, fm->buffer_id, + rules->rules[0]); + } + } + } else { + error = modify_flows_add__(ofproto, fm, req); + } + + return error; +} + /* Implements OFPFC_MODIFY. Returns 0 on success or an OpenFlow error code on * failure. * @@ -4649,11 +4707,8 @@ modify_flows_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, rule_criteria_destroy(&criteria); if (!error) { - error = (rules.n > 0 - ? modify_flows__(ofproto, fm, &rules, req) - : modify_flows_add(ofproto, fm, req)); + error = modify_flows(ofproto, fm, &rules, req); } - rule_collection_destroy(&rules); return error; @@ -4678,13 +4733,9 @@ modify_flow_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, rule_criteria_destroy(&criteria); if (!error) { - if (rules.n == 0) { - error = modify_flows_add(ofproto, fm, req); - } else if (rules.n == 1) { - error = modify_flows__(ofproto, fm, &rules, req); - } + /* collect_rules_strict() can return max 1 rule. */ + error = modify_flows(ofproto, fm, &rules, req); } - rule_collection_destroy(&rules); return error; From c84d8691141fa698100480043ace76c553fff280 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:39 -0700 Subject: [PATCH 098/146] ofproto: Split add_flow(). Split add_flow() to add_flow_begin() which does all the error checking, and add_flow_finish() which can not fail. Since we still want to send an error response for an unknown 'buffer_id', send_buffered_packet() now send the error response itself. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 215 ++++++++++++++++++++++++++-------------------- 1 file changed, 124 insertions(+), 91 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 3eca3b04a..0eda3f086 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -249,7 +249,7 @@ static void ofproto_rule_remove__(struct ofproto *, struct rule *) * meaningful and thus supplied as NULL. */ struct flow_mod_requester { struct ofconn *ofconn; /* Connection on which flow_mod arrived. */ - ovs_be32 xid; /* OpenFlow xid of flow_mod request. */ + const struct ofp_header *request; }; /* OpenFlow. */ @@ -269,8 +269,8 @@ static void delete_flows__(const struct rule_collection *, const struct flow_mod_requester *) OVS_REQUIRES(ofproto_mutex); -static enum ofperr send_buffered_packet(struct ofconn *, uint32_t buffer_id, - struct rule *) +static void send_buffered_packet(const struct flow_mod_requester *, + uint32_t buffer_id, struct rule *) OVS_REQUIRES(ofproto_mutex); static bool ofproto_group_exists__(const struct ofproto *ofproto, @@ -4335,20 +4335,9 @@ set_conjunctions(struct rule *rule, const struct cls_conjunction *conjs, cls_rule_set_conjunctions(cr, conjs, n_conjs); } -/* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT - * in which no matching flow already exists in the flow table. - * - * Adds the flow specified by 'ofm', which is followed by 'n_actions' - * ofp_actions, to the ofproto's flow table. Returns 0 on success, or an - * OpenFlow error code on failure. - * - * The caller retains ownership of 'fm->ofpacts'. - * - * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, - * if any. */ static enum ofperr -add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) +add_flow_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule **rulep, bool *modify) OVS_REQUIRES(ofproto_mutex) { struct oftable *table; @@ -4387,93 +4376,139 @@ add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return OFPERR_OFPBRC_EPERM; } - if (!(fm->flags & OFPUTIL_FF_HIDDEN_FIELDS)) { - if (!match_has_default_hidden_fields(&fm->match)) { - VLOG_WARN_RL(&rl, "%s: (add_flow) only internal flows can set " - "non-default values to hidden fields", ofproto->name); - return OFPERR_OFPBRC_EPERM; - } + if (!(fm->flags & OFPUTIL_FF_HIDDEN_FIELDS) + && !match_has_default_hidden_fields(&fm->match)) { + VLOG_WARN_RL(&rl, "%s: (add_flow) only internal flows can set " + "non-default values to hidden fields", ofproto->name); + return OFPERR_OFPBRC_EPERM; } cls_rule_init(&cr, &fm->match, fm->priority); - /* Transform "add" into "modify" if there's an existing identical flow. */ + /* Check for the existence of an identical rule. */ rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls, &cr)); if (rule) { + /* Transform "add" into "modify" of an existing identical flow. */ cls_rule_destroy(&cr); fm->modify_cookie = true; error = modify_flow_check__(ofproto, fm, rule); - if (!error) { - struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); - - modify_flow__(ofproto, fm, rule, req, &dead_cookies); - learned_cookies_flush(ofproto, &dead_cookies); - - goto send_packet; + if (error) { + return error; } - return error; - } - /* Check for overlap, if requested. */ - if (fm->flags & OFPUTIL_FF_CHECK_OVERLAP) { - if (classifier_rule_overlaps(&table->cls, &cr)) { + *modify = true; + } else { /* New rule. */ + struct cls_conjunction *conjs; + size_t n_conjs; + + /* Check for overlap, if requested. */ + if (fm->flags & OFPUTIL_FF_CHECK_OVERLAP + && classifier_rule_overlaps(&table->cls, &cr)) { cls_rule_destroy(&cr); return OFPERR_OFPFMFC_OVERLAP; } + + /* If necessary, evict an existing rule to clear out space. */ + error = evict_rules_from_table(table, 1); + if (error) { + cls_rule_destroy(&cr); + return error; + } + + /* Allocate new rule. */ + error = ofproto_rule_create(ofproto, fm, &cr, table - ofproto->tables, + &rule); + if (error) { + return error; + } + + /* Insert flow to the classifier, so that later flow_mods may relate + * to it. This is reversible, in case later errors require this to + * be reverted. */ + ofproto_rule_insert__(ofproto, rule); + /* Make the new rule invisible for classifier lookups. */ + classifier_defer(&table->cls); + get_conjunctions(fm, &conjs, &n_conjs); + classifier_insert(&table->cls, &rule->cr, conjs, n_conjs); + free(conjs); + + error = ofproto->ofproto_class->rule_insert(rule); + if (error) { + oftable_remove_rule(rule); + ofproto_rule_unref(rule); + return error; + } + + *modify = false; } - /* If necessary, evict an existing rule to clear out space. */ - error = evict_rules_from_table(table, 1); - if (error) { - cls_rule_destroy(&cr); - return error; - } + *rulep = rule; + return 0; +} - /* Allocate new rule. */ - error = ofproto_rule_create(ofproto, fm, &cr, table - ofproto->tables, - &rule); - if (error) { - return error; - } +static void +add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req, + struct rule *rule, bool modify) + OVS_REQUIRES(ofproto_mutex) +{ + if (modify) { + struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); - ofproto_rule_insert__(ofproto, rule); + modify_flow__(ofproto, fm, rule, req, &dead_cookies); + learned_cookies_flush(ofproto, &dead_cookies); + } else { + struct oftable *table = &ofproto->tables[rule->table_id]; - classifier_defer(&table->cls); + cls_rule_make_visible(&rule->cr); + classifier_publish(&table->cls); - struct cls_conjunction *conjs; - size_t n_conjs; - get_conjunctions(fm, &conjs, &n_conjs); - classifier_insert(&table->cls, &rule->cr, conjs, n_conjs); - free(conjs); + learned_cookies_inc(ofproto, rule_get_actions(rule)); - error = ofproto->ofproto_class->rule_insert(rule); - if (error) { - oftable_remove_rule(rule); - ofproto_rule_unref(rule); - return error; - } - cls_rule_make_visible(&rule->cr); - classifier_publish(&table->cls); + if (minimask_get_vid_mask(&rule->cr.match.mask) == VLAN_VID_MASK) { + if (ofproto->vlan_bitmap) { + uint16_t vid = miniflow_get_vid(&rule->cr.match.flow); - learned_cookies_inc(ofproto, rule_get_actions(rule)); - - if (minimask_get_vid_mask(&rule->cr.match.mask) == VLAN_VID_MASK) { - if (ofproto->vlan_bitmap) { - uint16_t vid = miniflow_get_vid(&rule->cr.match.flow); - if (!bitmap_is_set(ofproto->vlan_bitmap, vid)) { - bitmap_set1(ofproto->vlan_bitmap, vid); + if (!bitmap_is_set(ofproto->vlan_bitmap, vid)) { + bitmap_set1(ofproto->vlan_bitmap, vid); + ofproto->vlans_changed = true; + } + } else { ofproto->vlans_changed = true; } - } else { - ofproto->vlans_changed = true; } + + ofmonitor_report(ofproto->connmgr, rule, NXFME_ADDED, 0, + req ? req->ofconn : NULL, + req ? req->request->xid : 0, NULL); } - ofmonitor_report(ofproto->connmgr, rule, NXFME_ADDED, 0, - req ? req->ofconn : NULL, req ? req->xid : 0, NULL); -send_packet: - return req ? send_buffered_packet(req->ofconn, fm->buffer_id, rule) : 0; + send_buffered_packet(req, fm->buffer_id, rule); +} + +/* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT + * in which no matching flow already exists in the flow table. + * + * Adds the flow specified by 'fm', to the ofproto's flow table. Returns 0 on + * success, or an OpenFlow error code on failure. + * + * The caller retains ownership of 'fm->ofpacts'. */ +static enum ofperr +add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule *rule; + bool modify; + enum ofperr error; + + error = add_flow_begin(ofproto, fm, &rule, &modify); + if (!error) { + add_flow_finish(ofproto, fm, req, rule, modify); + } + + return error; } /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */ @@ -4618,7 +4653,7 @@ modify_flow__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, if (event != NXFME_MODIFIED || change_actions || change_cookie) { ofmonitor_report(ofproto->connmgr, rule, event, 0, - req ? req->ofconn : NULL, req ? req->xid : 0, + req ? req->ofconn : NULL, req ? req->request->xid : 0, change_actions ? actions : NULL); } @@ -4672,11 +4707,7 @@ modify_flows(struct ofproto *ofproto, struct ofputil_flow_mod *fm, error = modify_flows_check__(ofproto, fm, rules); if (!error) { modify_flows__(ofproto, fm, rules, req); - - if (fm->buffer_id != UINT32_MAX && req) { - error = send_buffered_packet(req->ofconn, fm->buffer_id, - rules->rules[0]); - } + send_buffered_packet(req, fm->buffer_id, rules->rules[0]); } } else { error = modify_flows_add__(ofproto, fm, req); @@ -4765,8 +4796,8 @@ delete_flows__(const struct rule_collection *rules, ofproto_rule_send_removed(rule, reason); ofmonitor_report(ofproto->connmgr, rule, NXFME_DELETED, reason, - req ? req->ofconn : NULL, req ? req->xid : 0, - NULL); + req ? req->ofconn : NULL, + req ? req->request->xid : 0, NULL); if (next_table == rule->table_id) { classifier_defer(cls); @@ -4952,7 +4983,7 @@ handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh) struct flow_mod_requester req; req.ofconn = ofconn; - req.xid = oh->xid; + req.request = oh; error = handle_flow_mod__(ofproto, &fm, &req); } if (error) { @@ -6668,18 +6699,19 @@ handle_openflow(struct ofconn *ofconn, const struct ofpbuf *ofp_msg) /* Asynchronous operations. */ -static enum ofperr -send_buffered_packet(struct ofconn *ofconn, uint32_t buffer_id, +static void +send_buffered_packet(const struct flow_mod_requester *req, uint32_t buffer_id, struct rule *rule) OVS_REQUIRES(ofproto_mutex) { - enum ofperr error = 0; - if (ofconn && buffer_id != UINT32_MAX) { - struct ofproto *ofproto = ofconn_get_ofproto(ofconn); + if (req && req->ofconn && buffer_id != UINT32_MAX) { + struct ofproto *ofproto = ofconn_get_ofproto(req->ofconn); struct dp_packet *packet; ofp_port_t in_port; + enum ofperr error; - error = ofconn_pktbuf_retrieve(ofconn, buffer_id, &packet, &in_port); + error = ofconn_pktbuf_retrieve(req->ofconn, buffer_id, &packet, + &in_port); if (packet) { struct rule_execute *re; @@ -6696,9 +6728,10 @@ send_buffered_packet(struct ofconn *ofconn, uint32_t buffer_id, dp_packet_delete(re->packet); free(re); } + } else { + ofconn_send_error(req->ofconn, req->request, error); } } - return error; } static uint64_t From d99f64d7e01611ca9f7fe94999d029a9399a1275 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:39 -0700 Subject: [PATCH 099/146] ofproto: Split modify_flows(). Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 114 +++++++++++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 36 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 0eda3f086..d1cae0201 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -261,7 +261,7 @@ static enum ofperr modify_flow_check__(struct ofproto *, const struct rule *) OVS_REQUIRES(ofproto_mutex); static void modify_flow__(struct ofproto *, struct ofputil_flow_mod *, - struct rule *, const struct flow_mod_requester *, + const struct flow_mod_requester *, struct rule *, struct ovs_list *dead_cookies) OVS_REQUIRES(ofproto_mutex); static void delete_flows__(const struct rule_collection *, @@ -4456,7 +4456,7 @@ add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, if (modify) { struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); - modify_flow__(ofproto, fm, rule, req, &dead_cookies); + modify_flow__(ofproto, fm, req, rule, &dead_cookies); learned_cookies_flush(ofproto, &dead_cookies); } else { struct oftable *table = &ofproto->tables[rule->table_id]; @@ -4555,7 +4555,7 @@ modify_flows_check__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, /* Modifies the 'rule', changing them to match 'fm'. */ static void modify_flow__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - struct rule *rule, const struct flow_mod_requester *req, + const struct flow_mod_requester *req, struct rule *rule, struct ovs_list *dead_cookies) OVS_REQUIRES(ofproto_mutex) { @@ -4671,48 +4671,40 @@ modify_flow__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, * if any. */ static void modify_flows__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct rule_collection *rules, - const struct flow_mod_requester *req) + const struct flow_mod_requester *req, + const struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); size_t i; for (i = 0; i < rules->n; i++) { - modify_flow__(ofproto, fm, rules->rules[i], req, &dead_cookies); + modify_flow__(ofproto, fm, req, rules->rules[i], &dead_cookies); } learned_cookies_flush(ofproto, &dead_cookies); } static enum ofperr -modify_flows_add__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) - OVS_REQUIRES(ofproto_mutex) -{ - if (fm->cookie_mask != htonll(0) || fm->new_cookie == OVS_BE64_MAX) { - return 0; - } - return add_flow(ofproto, fm, req); -} - -static enum ofperr -modify_flows(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct rule_collection *rules, - const struct flow_mod_requester *req) +modify_flows_begin__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { enum ofperr error; if (rules->n > 0) { error = modify_flows_check__(ofproto, fm, rules); + } else if (!(fm->cookie_mask != htonll(0) + || fm->new_cookie == OVS_BE64_MAX)) { + bool modify; + + error = add_flow_begin(ofproto, fm, &rules->rules[0], &modify); if (!error) { - modify_flows__(ofproto, fm, rules, req); - send_buffered_packet(req, fm->buffer_id, rules->rules[0]); + ovs_assert(!modify); } } else { - error = modify_flows_add__(ofproto, fm, req); + rules->rules[0] = NULL; + error = 0; } - return error; } @@ -4722,25 +4714,57 @@ modify_flows(struct ofproto *ofproto, struct ofputil_flow_mod *fm, * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, * if any. */ static enum ofperr -modify_flows_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) +modify_flows_begin_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; - struct rule_collection rules; enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); - error = collect_rules_loose(ofproto, &criteria, &rules); + error = collect_rules_loose(ofproto, &criteria, rules); rule_criteria_destroy(&criteria); if (!error) { - error = modify_flows(ofproto, fm, &rules, req); + error = modify_flows_begin__(ofproto, fm, rules); + } + + if (error) { + rule_collection_destroy(rules); + } + return error; +} + +static void +modify_flows_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req, + struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + if (rules->n > 0) { + modify_flows__(ofproto, fm, req, rules); + send_buffered_packet(req, fm->buffer_id, rules->rules[0]); + } else if (rules->rules[0] != NULL) { + add_flow_finish(ofproto, fm, req, rules->rules[0], false); + } + rule_collection_destroy(rules); +} + +static enum ofperr +modify_flows_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule_collection rules; + enum ofperr error; + + error = modify_flows_begin_loose(ofproto, fm, &rules); + if (!error) { + modify_flows_finish(ofproto, fm, req, &rules); } - rule_collection_destroy(&rules); return error; } @@ -4748,29 +4772,47 @@ modify_flows_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error * code on failure. */ static enum ofperr -modify_flow_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) +modify_flow_begin_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; - struct rule_collection rules; enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); - error = collect_rules_strict(ofproto, &criteria, &rules); + error = collect_rules_strict(ofproto, &criteria, rules); rule_criteria_destroy(&criteria); if (!error) { /* collect_rules_strict() can return max 1 rule. */ - error = modify_flows(ofproto, fm, &rules, req); + error = modify_flows_begin__(ofproto, fm, rules); + } + + if (error) { + rule_collection_destroy(rules); + } + return error; +} + +static enum ofperr +modify_flow_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule_collection rules; + enum ofperr error; + + error = modify_flow_begin_strict(ofproto, fm, &rules); + if (!error) { + modify_flows_finish(ofproto, fm, req, &rules); } - rule_collection_destroy(&rules); return error; } + /* OFPFC_DELETE implementation. */ From ce59413fb48428f52c02e167cef6d6cf4d5984d1 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 29 May 2015 11:28:39 -0700 Subject: [PATCH 100/146] ofproto: Split delete_flow*(). Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 79 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index d1cae0201..f98656648 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -3778,6 +3778,8 @@ rule_collection_destroy(struct rule_collection *rules) * function verifies most of the criteria in 'c' itself, but the caller must * check 'c->cr' itself. * + * Rules that have already been marked as 'to_be_removed' are not collected. + * * Increments '*n_readonly' if 'rule' wasn't added because it's read-only (and * 'c' only includes modifiable rules). */ static void @@ -3789,7 +3791,8 @@ collect_rule(struct rule *rule, const struct rule_criteria *c, && ofproto_rule_has_out_port(rule, c->out_port) && ofproto_rule_has_out_group(rule, c->out_group) && !((rule->flow_cookie ^ c->cookie) & c->cookie_mask) - && (!rule_is_hidden(rule) || c->include_hidden)) { + && (!rule_is_hidden(rule) || c->include_hidden) + && !rule->cr.to_be_removed) { /* Rule matches all the criteria... */ if (!rule_is_readonly(rule) || c->include_readonly) { /* ...add it. */ @@ -4864,13 +4867,12 @@ delete_flows__(const struct rule_collection *rules, /* Implements OFPFC_DELETE. */ static enum ofperr -delete_flows_loose(struct ofproto *ofproto, - const struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) +delete_flows_begin_loose(struct ofproto *ofproto, + const struct ofputil_flow_mod *fm, + struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; - struct rule_collection rules; enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, @@ -4878,25 +4880,55 @@ delete_flows_loose(struct ofproto *ofproto, fm->out_port, fm->out_group); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); - error = collect_rules_loose(ofproto, &criteria, &rules); + error = collect_rules_loose(ofproto, &criteria, rules); rule_criteria_destroy(&criteria); if (!error) { - delete_flows__(&rules, fm->delete_reason, req); + for (size_t i = 0; i < rules->n; i++) { + struct rule *rule = rules->rules[i]; + + CONST_CAST(struct cls_rule *, &rule->cr)->to_be_removed = true; + } + } + + return error; +} + +static void +delete_flows_finish(const struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req, + struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + delete_flows__(rules, fm->delete_reason, req); + rule_collection_destroy(rules); +} + +static enum ofperr +delete_flows_loose(struct ofproto *ofproto, + const struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule_collection rules; + enum ofperr error; + + error = delete_flows_begin_loose(ofproto, fm, &rules); + if (!error) { + delete_flows_finish(fm, req, &rules); } - rule_collection_destroy(&rules); return error; } /* Implements OFPFC_DELETE_STRICT. */ static enum ofperr -delete_flow_strict(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) +delete_flow_begin_strict(struct ofproto *ofproto, + const struct ofputil_flow_mod *fm, + struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; - struct rule_collection rules; enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, @@ -4904,13 +4936,32 @@ delete_flow_strict(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, fm->out_port, fm->out_group); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); - error = collect_rules_strict(ofproto, &criteria, &rules); + error = collect_rules_strict(ofproto, &criteria, rules); rule_criteria_destroy(&criteria); if (!error) { - delete_flows__(&rules, fm->delete_reason, req); + for (size_t i = 0; i < rules->n; i++) { + struct rule *rule = rules->rules[i]; + + CONST_CAST(struct cls_rule *, &rule->cr)->to_be_removed = true; + } + } + + return error; +} + +static enum ofperr +delete_flow_strict(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule_collection rules; + enum ofperr error; + + error = delete_flow_begin_strict(ofproto, fm, &rules); + if (!error) { + delete_flows_finish(fm, req, &rules); } - rule_collection_destroy(&rules); return error; } From 1f42be1c6696df46b46dade5d73470f395426a9c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 1 Jun 2015 15:47:58 -0700 Subject: [PATCH 101/146] ofproto: Add support for reverting flow mods and bundle commit. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/ofp-util.c | 8 + ofproto/bundles.c | 44 ++---- ofproto/bundles.h | 25 +++- ofproto/connmgr.c | 2 +- ofproto/ofproto.c | 368 ++++++++++++++++++++++++++++++---------------- tests/ofproto.at | 66 ++++----- 6 files changed, 313 insertions(+), 200 deletions(-) diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 17a0c412a..0f9a38d85 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -1795,11 +1795,19 @@ ofputil_decode_flow_mod(struct ofputil_flow_mod *fm, fm->command = command & 0xff; fm->table_id = command >> 8; } else { + if (command > 0xff) { + VLOG_WARN_RL(&bad_ofmsg_rl, "flow_mod has explicit table_id " + "but flow_mod_table_id extension is not enabled"); + } fm->command = command; fm->table_id = 0xff; } } + if (fm->command > OFPFC_DELETE_STRICT) { + return OFPERR_OFPFMFC_BAD_COMMAND; + } + error = ofpacts_pull_openflow_instructions(&b, b.size, oh->version, ofpacts); if (error) { diff --git a/ofproto/bundles.c b/ofproto/bundles.c index ebf8f7ff7..686d61f49 100644 --- a/ofproto/bundles.c +++ b/ofproto/bundles.c @@ -59,11 +59,16 @@ ofp_bundle_create(uint32_t id, uint16_t flags) } void -ofp_bundle_remove__(struct ofconn *ofconn, struct ofp_bundle *bundle) +ofp_bundle_remove__(struct ofconn *ofconn, struct ofp_bundle *bundle, + bool success) { struct ofp_bundle_entry *msg; LIST_FOR_EACH_POP (msg, node, &bundle->msg_list) { + if (success && msg->type == OFPTYPE_FLOW_MOD) { + /* Tell connmgr about successful flow mods. */ + ofconn_report_flow_mod(ofconn, msg->fm.command); + } ofp_bundle_entry_free(msg); } @@ -81,7 +86,7 @@ ofp_bundle_open(struct ofconn *ofconn, uint32_t id, uint16_t flags) if (bundle) { VLOG_INFO("Bundle %x already exists.", id); - ofp_bundle_remove__(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle, false); return OFPERR_OFPBFC_BAD_ID; } @@ -107,12 +112,12 @@ ofp_bundle_close(struct ofconn *ofconn, uint32_t id, uint16_t flags) } if (bundle->state == BS_CLOSED) { - ofp_bundle_remove__(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle, false); return OFPERR_OFPBFC_BUNDLE_CLOSED; } if (bundle->flags != flags) { - ofp_bundle_remove__(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle, false); return OFPERR_OFPBFC_BAD_FLAGS; } @@ -120,31 +125,6 @@ ofp_bundle_close(struct ofconn *ofconn, uint32_t id, uint16_t flags) return 0; } -enum ofperr -ofp_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) -{ - struct ofp_bundle *bundle; - enum ofperr error = 0; - struct ofp_bundle_entry *msg; - - bundle = ofconn_get_bundle(ofconn, id); - - if (!bundle) { - return OFPERR_OFPBFC_BAD_ID; - } - if (bundle->flags != flags) { - error = OFPERR_OFPBFC_BAD_FLAGS; - } else { - LIST_FOR_EACH (msg, node, &bundle->msg_list) { - /* XXX: actual commit */ - error = OFPERR_OFPBFC_MSG_FAILED; - } - } - - ofp_bundle_remove__(ofconn, bundle); - return error; -} - enum ofperr ofp_bundle_discard(struct ofconn *ofconn, uint32_t id) { @@ -156,7 +136,7 @@ ofp_bundle_discard(struct ofconn *ofconn, uint32_t id) return OFPERR_OFPBFC_BAD_ID; } - ofp_bundle_remove__(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle, false); return 0; } @@ -179,10 +159,10 @@ ofp_bundle_add_message(struct ofconn *ofconn, uint32_t id, uint16_t flags, return error; } } else if (bundle->state == BS_CLOSED) { - ofp_bundle_remove__(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle, false); return OFPERR_OFPBFC_BUNDLE_CLOSED; } else if (flags != bundle->flags) { - ofp_bundle_remove__(ofconn, bundle); + ofp_bundle_remove__(ofconn, bundle, false); return OFPERR_OFPBFC_BAD_FLAGS; } diff --git a/ofproto/bundles.h b/ofproto/bundles.h index b885c9b49..778cba25a 100644 --- a/ofproto/bundles.h +++ b/ofproto/bundles.h @@ -24,6 +24,8 @@ #include "connmgr.h" #include "ofp-msgs.h" #include "ofp-util.h" +#include "ofproto-provider.h" +#include "util.h" #ifdef __cplusplus extern "C" { @@ -31,12 +33,19 @@ extern "C" { struct ofp_bundle_entry { struct ovs_list node; - ovs_be32 xid; /* For error returns. */ enum ofptype type; /* OFPTYPE_FLOW_MOD or OFPTYPE_PORT_MOD. */ union { struct ofputil_flow_mod fm; /* 'fm.ofpacts' must be malloced. */ struct ofputil_port_mod pm; }; + + /* Used during commit. */ + struct rule_collection rules; /* Affected rules. */ + struct rule *rule; + bool modify; + + /* OpenFlow header and some of the message contents for error reporting. */ + struct ofp_header ofp_msg[DIV_ROUND_UP(64, sizeof(struct ofp_header))]; }; enum bundle_state { @@ -55,30 +64,32 @@ struct ofp_bundle { }; static inline struct ofp_bundle_entry *ofp_bundle_entry_alloc( - enum ofptype type, ovs_be32 xid); + enum ofptype type, const struct ofp_header *oh); static inline void ofp_bundle_entry_free(struct ofp_bundle_entry *); enum ofperr ofp_bundle_open(struct ofconn *, uint32_t id, uint16_t flags); enum ofperr ofp_bundle_close(struct ofconn *, uint32_t id, uint16_t flags); -enum ofperr ofp_bundle_commit(struct ofconn *, uint32_t id, uint16_t flags); enum ofperr ofp_bundle_discard(struct ofconn *, uint32_t id); enum ofperr ofp_bundle_add_message(struct ofconn *, uint32_t id, uint16_t flags, struct ofp_bundle_entry *); -void ofp_bundle_remove__(struct ofconn *ofconn, struct ofp_bundle *bundle); +void ofp_bundle_remove__(struct ofconn *, struct ofp_bundle *, bool success); static inline struct ofp_bundle_entry * -ofp_bundle_entry_alloc(enum ofptype type, ovs_be32 xid) +ofp_bundle_entry_alloc(enum ofptype type, const struct ofp_header *oh) { struct ofp_bundle_entry *entry = xmalloc(sizeof *entry); - entry->xid = xid; entry->type = type; + /* Max 64 bytes for error reporting. */ + memcpy(entry->ofp_msg, oh, MIN(ntohs(oh->length), sizeof entry->ofp_msg)); + return entry; } -static inline void ofp_bundle_entry_free(struct ofp_bundle_entry *entry) +static inline void +ofp_bundle_entry_free(struct ofp_bundle_entry *entry) { if (entry) { if (entry->type == OFPTYPE_FLOW_MOD) { diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 9851997f9..495364fe4 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1215,7 +1215,7 @@ bundle_remove_all(struct ofconn *ofconn) struct ofp_bundle *b, *next; HMAP_FOR_EACH_SAFE (b, next, node, &ofconn->bundles) { - ofp_bundle_remove__(ofconn, b); + ofp_bundle_remove__(ofconn, b, false); } } diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index f98656648..0a1d03250 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -253,9 +253,6 @@ struct flow_mod_requester { }; /* OpenFlow. */ -static enum ofperr add_flow(struct ofproto *, struct ofputil_flow_mod *, - const struct flow_mod_requester *); - static enum ofperr modify_flow_check__(struct ofproto *, struct ofputil_flow_mod *, const struct rule *) @@ -281,6 +278,15 @@ static bool ofproto_group_exists(const struct ofproto *ofproto, OVS_EXCLUDED(ofproto->groups_rwlock); static enum ofperr add_group(struct ofproto *, struct ofputil_group_mod *); static void handle_openflow(struct ofconn *, const struct ofpbuf *); +static enum ofperr do_bundle_flow_mod_begin(struct ofproto *, + struct ofputil_flow_mod *, + struct ofp_bundle_entry *) + OVS_REQUIRES(ofproto_mutex); +static void do_bundle_flow_mod_finish(struct ofproto *, + struct ofputil_flow_mod *, + const struct flow_mod_requester *, + struct ofp_bundle_entry *) + OVS_REQUIRES(ofproto_mutex); static enum ofperr handle_flow_mod__(struct ofproto *, struct ofputil_flow_mod *, const struct flow_mod_requester *) @@ -4338,6 +4344,17 @@ set_conjunctions(struct rule *rule, const struct cls_conjunction *conjs, cls_rule_set_conjunctions(cr, conjs, n_conjs); } +/* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT + * in which no matching flow already exists in the flow table. + * + * Adds the flow specified by 'fm', to the ofproto's flow table. Returns 0 on + * success, or an OpenFlow error code on failure. + * + * On successful return the caller must complete the operation either by + * calling add_flow_finish(), or add_flow_revert() if the operation needs to + * be reverted. + * + * The caller retains ownership of 'fm->ofpacts'. */ static enum ofperr add_flow_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct rule **rulep, bool *modify) @@ -4388,7 +4405,8 @@ add_flow_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, cls_rule_init(&cr, &fm->match, fm->priority); - /* Check for the existence of an identical rule. */ + /* Check for the existence of an identical rule. + * This will not return rules earlier marked as 'to_be_removed'. */ rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls, &cr)); if (rule) { /* Transform "add" into "modify" of an existing identical flow. */ @@ -4450,6 +4468,29 @@ add_flow_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return 0; } +/* Revert the effects of add_flow_begin(). + * 'new_rule' must be passed in as NULL, if no new rule was allocated and + * inserted to the classifier. + * Note: evictions cannot be reverted. */ +static void +add_flow_revert(struct ofproto *ofproto, struct rule *new_rule) + OVS_REQUIRES(ofproto_mutex) +{ + /* Old rule was not changed yet, only need to revert a new rule. */ + if (new_rule) { + struct oftable *table = &ofproto->tables[new_rule->table_id]; + + if (!classifier_remove(&table->cls, &new_rule->cr)) { + OVS_NOT_REACHED(); + } + classifier_publish(&table->cls); + + ofproto_rule_remove__(ofproto, new_rule); + ofproto->ofproto_class->rule_delete(new_rule); + ofproto_rule_unref(new_rule); + } +} + static void add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, @@ -4489,30 +4530,6 @@ add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, send_buffered_packet(req, fm->buffer_id, rule); } - -/* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT - * in which no matching flow already exists in the flow table. - * - * Adds the flow specified by 'fm', to the ofproto's flow table. Returns 0 on - * success, or an OpenFlow error code on failure. - * - * The caller retains ownership of 'fm->ofpacts'. */ -static enum ofperr -add_flow(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule *rule; - bool modify; - enum ofperr error; - - error = add_flow_begin(ofproto, fm, &rule, &modify); - if (!error) { - add_flow_finish(ofproto, fm, req, rule, modify); - } - - return error; -} /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */ @@ -4741,6 +4758,17 @@ modify_flows_begin_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return error; } +static void +modify_flows_revert(struct ofproto *ofproto, struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + /* Old rules were not changed yet, only need to revert a new rule. */ + if (rules->n == 0 && rules->rules[0] != NULL) { + add_flow_revert(ofproto, rules->rules[0]); + } + rule_collection_destroy(rules); +} + static void modify_flows_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, @@ -4756,22 +4784,6 @@ modify_flows_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, rule_collection_destroy(rules); } -static enum ofperr -modify_flows_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule_collection rules; - enum ofperr error; - - error = modify_flows_begin_loose(ofproto, fm, &rules); - if (!error) { - modify_flows_finish(ofproto, fm, req, &rules); - } - - return error; -} - /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error * code on failure. */ static enum ofperr @@ -4799,23 +4811,6 @@ modify_flow_begin_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } return error; } - -static enum ofperr -modify_flow_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule_collection rules; - enum ofperr error; - - error = modify_flow_begin_strict(ofproto, fm, &rules); - if (!error) { - modify_flows_finish(ofproto, fm, req, &rules); - } - - return error; -} - /* OFPFC_DELETE implementation. */ @@ -4894,6 +4889,18 @@ delete_flows_begin_loose(struct ofproto *ofproto, return error; } +static void +delete_flows_revert(struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + for (size_t i = 0; i < rules->n; i++) { + struct rule *rule = rules->rules[i]; + + CONST_CAST(struct cls_rule *, &rule->cr)->to_be_removed = false; + } + rule_collection_destroy(rules); +} + static void delete_flows_finish(const struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, @@ -4904,23 +4911,6 @@ delete_flows_finish(const struct ofputil_flow_mod *fm, rule_collection_destroy(rules); } -static enum ofperr -delete_flows_loose(struct ofproto *ofproto, - const struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule_collection rules; - enum ofperr error; - - error = delete_flows_begin_loose(ofproto, fm, &rules); - if (!error) { - delete_flows_finish(fm, req, &rules); - } - - return error; -} - /* Implements OFPFC_DELETE_STRICT. */ static enum ofperr delete_flow_begin_strict(struct ofproto *ofproto, @@ -4950,22 +4940,6 @@ delete_flow_begin_strict(struct ofproto *ofproto, return error; } -static enum ofperr -delete_flow_strict(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule_collection rules; - enum ofperr error; - - error = delete_flow_begin_strict(ofproto, fm, &rules); - if (!error) { - delete_flows_finish(fm, req, &rules); - } - - return error; -} - static void ofproto_rule_send_removed(struct rule *rule, uint8_t reason) OVS_REQUIRES(ofproto_mutex) @@ -5096,38 +5070,13 @@ handle_flow_mod__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req) OVS_EXCLUDED(ofproto_mutex) { + struct ofp_bundle_entry be; enum ofperr error; ovs_mutex_lock(&ofproto_mutex); - switch (fm->command) { - case OFPFC_ADD: - error = add_flow(ofproto, fm, req); - break; - - case OFPFC_MODIFY: - error = modify_flows_loose(ofproto, fm, req); - break; - - case OFPFC_MODIFY_STRICT: - error = modify_flow_strict(ofproto, fm, req); - break; - - case OFPFC_DELETE: - error = delete_flows_loose(ofproto, fm, req); - break; - - case OFPFC_DELETE_STRICT: - error = delete_flow_strict(ofproto, fm, req); - break; - - default: - if (fm->command > 0xff) { - VLOG_WARN_RL(&rl, "%s: flow_mod has explicit table_id but " - "flow_mod_table_id extension is not enabled", - ofproto->name); - } - error = OFPERR_OFPFMFC_BAD_COMMAND; - break; + error = do_bundle_flow_mod_begin(ofproto, fm, &be); + if (!error) { + do_bundle_flow_mod_finish(ofproto, fm, req, &be); } ofmonitor_flush(ofproto->connmgr); ovs_mutex_unlock(&ofproto_mutex); @@ -6490,13 +6439,174 @@ handle_table_mod(struct ofconn *ofconn, const struct ofp_header *oh) return table_mod(ofproto, &tm); } +static enum ofperr +do_bundle_flow_mod_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct ofp_bundle_entry *be) + OVS_REQUIRES(ofproto_mutex) +{ + switch (fm->command) { + case OFPFC_ADD: + return add_flow_begin(ofproto, fm, &be->rule, &be->modify); + + case OFPFC_MODIFY: + return modify_flows_begin_loose(ofproto, fm, &be->rules); + + case OFPFC_MODIFY_STRICT: + return modify_flow_begin_strict(ofproto, fm, &be->rules); + + case OFPFC_DELETE: + return delete_flows_begin_loose(ofproto, fm, &be->rules); + + case OFPFC_DELETE_STRICT: + return delete_flow_begin_strict(ofproto, fm, &be->rules); + } + + return OFPERR_OFPFMFC_BAD_COMMAND; +} + +static void +do_bundle_flow_mod_revert(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct ofp_bundle_entry *be) + OVS_REQUIRES(ofproto_mutex) +{ + switch (fm->command) { + case OFPFC_ADD: + add_flow_revert(ofproto, be->modify ? NULL : be->rule); + break; + + case OFPFC_MODIFY: + case OFPFC_MODIFY_STRICT: + modify_flows_revert(ofproto, &be->rules); + break; + + case OFPFC_DELETE: + case OFPFC_DELETE_STRICT: + delete_flows_revert(&be->rules); + break; + + default: + break; + } +} + +static void +do_bundle_flow_mod_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req, + struct ofp_bundle_entry *be) + OVS_REQUIRES(ofproto_mutex) +{ + switch (fm->command) { + case OFPFC_ADD: + add_flow_finish(ofproto, fm, req, be->rule, be->modify); + break; + + case OFPFC_MODIFY: + case OFPFC_MODIFY_STRICT: + modify_flows_finish(ofproto, fm, req, &be->rules); + break; + + case OFPFC_DELETE: + case OFPFC_DELETE_STRICT: + delete_flows_finish(fm, req, &be->rules); + break; + + default: + break; + } +} + +/* Commit phases (all while locking ofproto_mutex): + * + * 1. Gather resources - do not send any events or notifications. + * + * add: Check conflicts, check for a displaced flow. If no displaced flow + * exists, add the new flow, but mark it as "invisible". + * mod: Collect affected flows, Do not modify yet. + * del: Collect affected flows, Do not delete yet. + * + * 2a. Fail if any errors are found. After this point no errors are possible. + * No visible changes were made, so rollback is minimal (remove added invisible + * flows, revert 'to_be_removed' status of flows). + * + * 2b. Commit the changes + * + * add: if have displaced flow, modify it, otherwise mark the new flow as + * "visible". + * mod: Modify the collected flows. + * del: Delete the collected flows. + */ +static enum ofperr +do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) +{ + struct ofproto *ofproto = ofconn_get_ofproto(ofconn); + struct ofp_bundle *bundle; + struct ofp_bundle_entry *be; + enum ofperr error; + + bundle = ofconn_get_bundle(ofconn, id); + + if (!bundle) { + return OFPERR_OFPBFC_BAD_ID; + } + if (bundle->flags != flags) { + error = OFPERR_OFPBFC_BAD_FLAGS; + } else { + error = 0; + ovs_mutex_lock(&ofproto_mutex); + LIST_FOR_EACH (be, node, &bundle->msg_list) { + if (be->type == OFPTYPE_PORT_MOD) { + /* Not supported yet. */ + error = OFPERR_OFPBFC_MSG_FAILED; + } else if (be->type == OFPTYPE_FLOW_MOD) { + error = do_bundle_flow_mod_begin(ofproto, &be->fm, be); + } else { + OVS_NOT_REACHED(); + } + if (error) { + break; + } + } + if (error) { + /* Send error referring to the original message. */ + if (error) { + ofconn_send_error(ofconn, be->ofp_msg, error); + error = OFPERR_OFPBFC_MSG_FAILED; + } + + /* Revert all previous entires. */ + LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) { + if (be->type == OFPTYPE_FLOW_MOD) { + do_bundle_flow_mod_revert(ofproto, &be->fm, be); + } + } + } else { + /* Finish the changes. */ + LIST_FOR_EACH (be, node, &bundle->msg_list) { + if (be->type == OFPTYPE_FLOW_MOD) { + struct flow_mod_requester req = { ofconn, be->ofp_msg }; + + do_bundle_flow_mod_finish(ofproto, &be->fm, &req, be); + } + } + } + ofmonitor_flush(ofproto->connmgr); + ovs_mutex_unlock(&ofproto_mutex); + + run_rule_executes(ofproto); + } + + /* The bundle is discarded regardless the outcome. */ + ofp_bundle_remove__(ofconn, bundle, !error); + return error; +} + static enum ofperr handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) { - enum ofperr error; struct ofputil_bundle_ctrl_msg bctrl; - struct ofpbuf *buf; struct ofputil_bundle_ctrl_msg reply; + struct ofpbuf *buf; + enum ofperr error; error = reject_slave_controller(ofconn); if (error) { @@ -6507,6 +6617,10 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) if (error) { return error; } + /* Atomic updates not supported yet. */ + if (bctrl.flags & OFPBF_ATOMIC) { + return OFPERR_OFPBFC_BAD_FLAGS; + } reply.flags = 0; reply.bundle_id = bctrl.bundle_id; @@ -6520,7 +6634,7 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) reply.type = OFPBCT_CLOSE_REPLY;; break; case OFPBCT_COMMIT_REQUEST: - error = ofp_bundle_commit(ofconn, bctrl.bundle_id, bctrl.flags); + error = do_bundle_commit(ofconn, bctrl.bundle_id, bctrl.flags); reply.type = OFPBCT_COMMIT_REPLY; break; case OFPBCT_DISCARD_REQUEST: @@ -6562,7 +6676,7 @@ handle_bundle_add(struct ofconn *ofconn, const struct ofp_header *oh) return error; } - bmsg = ofp_bundle_entry_alloc(type, badd.msg->xid); + bmsg = ofp_bundle_entry_alloc(type, badd.msg); if (type == OFPTYPE_PORT_MOD) { error = ofputil_decode_port_mod(badd.msg, &bmsg->pm, false); diff --git a/tests/ofproto.at b/tests/ofproto.at index f4e5321d0..9729a7c81 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -3227,13 +3227,13 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Send an OpenFlow14 message (05), OFPT_BUNDLE_CONTROL (21), length (10), xid (0a) -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0x1 type=OPEN_REPLY flags=0 OFPT_BARRIER_REPLY (OF1.4): @@ -3251,23 +3251,23 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Send twice an OpenFlow14 message (05), OFPT_BUNDLE_CONTROL (21), length (10), xid (0a) -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0x1 type=OPEN_REPLY flags=0 OFPT_BARRIER_REPLY (OF1.4): send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_ERROR (OF1.4): OFPBFC_BAD_ID OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_BARRIER_REPLY (OF1.4): ]) @@ -3282,16 +3282,16 @@ OVS_VSWITCHD_START ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2>&1 AT_CAPTURE_FILE([monitor.log]) -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=atomic + bundle_id=0x1 type=CLOSE_REQUEST flags=ordered OFPT_ERROR (OF1.4): OFPBFC_BAD_ID OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=atomic + bundle_id=0x1 type=CLOSE_REQUEST flags=ordered OFPT_BARRIER_REPLY (OF1.4): ]) @@ -3307,30 +3307,30 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Open, Close, Close -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0x1 type=OPEN_REPLY flags=0 OFPT_BARRIER_REPLY (OF1.4): send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=atomic + bundle_id=0x1 type=CLOSE_REQUEST flags=ordered OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0x1 type=CLOSE_REPLY flags=0 OFPT_BARRIER_REPLY (OF1.4): send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=atomic + bundle_id=0x1 type=CLOSE_REQUEST flags=ordered OFPT_ERROR (OF1.4): OFPBFC_BUNDLE_CLOSED OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=atomic + bundle_id=0x1 type=CLOSE_REQUEST flags=ordered OFPT_BARRIER_REPLY (OF1.4): ]) @@ -3346,23 +3346,23 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Open, Close, Close -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 02" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 02 00 01" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0x1 type=OPEN_REPLY flags=0 OFPT_BARRIER_REPLY (OF1.4): send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=ordered + bundle_id=0x1 type=CLOSE_REQUEST flags=atomic OFPT_ERROR (OF1.4): OFPBFC_BAD_FLAGS OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=CLOSE_REQUEST flags=ordered + bundle_id=0x1 type=CLOSE_REQUEST flags=atomic OFPT_BARRIER_REPLY (OF1.4): ]) @@ -3378,16 +3378,16 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Open, Close, Close -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 04 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 04 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=COMMIT_REQUEST flags=atomic + bundle_id=0x1 type=COMMIT_REQUEST flags=ordered OFPT_ERROR (OF1.4): OFPBFC_BAD_ID OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=COMMIT_REQUEST flags=atomic + bundle_id=0x1 type=COMMIT_REQUEST flags=ordered OFPT_BARRIER_REPLY (OF1.4): ]) @@ -3403,23 +3403,23 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Open, Close, Close -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 00 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 04 00 02" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 04 00 01" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=OPEN_REQUEST flags=atomic + bundle_id=0x1 type=OPEN_REQUEST flags=ordered OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0x1 type=OPEN_REPLY flags=0 OFPT_BARRIER_REPLY (OF1.4): send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=COMMIT_REQUEST flags=ordered + bundle_id=0x1 type=COMMIT_REQUEST flags=atomic OFPT_ERROR (OF1.4): OFPBFC_BAD_FLAGS OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=COMMIT_REQUEST flags=ordered + bundle_id=0x1 type=COMMIT_REQUEST flags=atomic OFPT_BARRIER_REPLY (OF1.4): ]) @@ -3435,16 +3435,16 @@ ovs-ofctl -O OpenFlow14 monitor br0 --detach --no-chdir --pidfile >monitor.log 2 AT_CAPTURE_FILE([monitor.log]) # Open, Close, Close -ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 06 00 01" +ovs-appctl -t ovs-ofctl ofctl/send "05 21 00 10 00 00 00 0a 00 00 00 01 00 06 00 02" ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([ofctl_strip < monitor.log], [0], [dnl send: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=DISCARD_REQUEST flags=atomic + bundle_id=0x1 type=DISCARD_REQUEST flags=ordered OFPT_ERROR (OF1.4): OFPBFC_BAD_ID OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0x1 type=DISCARD_REQUEST flags=atomic + bundle_id=0x1 type=DISCARD_REQUEST flags=ordered OFPT_BARRIER_REPLY (OF1.4): ]) From 1734bf29a37e746b79689827e587b68248ddc834 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 18 May 2015 17:17:41 -0700 Subject: [PATCH 102/146] metaflow: Convert hex parsing to use new utility functions. We now have functions that can do parsing and printing of long hex strings, so we should use them for meta flow fields to ensure consistent behavior. Since these functions can handle infinitely long strings, we can also increase the maximum field size for MFS_HEXADECIMAL types to the limit allowed by NXM/OXM. This is useful for future large fields, such as Geneve options. Signed-off-by: Jesse Gross Acked-by: Andy Zhou --- build-aux/extract-ofp-fields | 20 ++++++------ lib/meta-flow.c | 62 +++++++++++++++--------------------- 2 files changed, 35 insertions(+), 47 deletions(-) diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index 315552d12..ca2ca044e 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -21,16 +21,16 @@ TYPES = {"u8": 1, "be64": 8, "IPv6": 16} -FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8), - "hexadecimal": ("MFS_HEXADECIMAL", 1, 8), - "Ethernet": ("MFS_ETHERNET", 6, 6), - "IPv4": ("MFS_IPV4", 4, 4), - "IPv6": ("MFS_IPV6", 16, 16), - "OpenFlow 1.0 port": ("MFS_OFP_PORT", 2, 2), - "OpenFlow 1.1+ port": ("MFS_OFP_PORT_OXM", 4, 4), - "frag": ("MFS_FRAG", 1, 1), - "tunnel flags": ("MFS_TNL_FLAGS", 2, 2), - "TCP flags": ("MFS_TCP_FLAGS", 2, 2)} +FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8), + "hexadecimal": ("MFS_HEXADECIMAL", 1, 127), + "Ethernet": ("MFS_ETHERNET", 6, 6), + "IPv4": ("MFS_IPV4", 4, 4), + "IPv6": ("MFS_IPV6", 16, 16), + "OpenFlow 1.0 port": ("MFS_OFP_PORT", 2, 2), + "OpenFlow 1.1+ port": ("MFS_OFP_PORT_OXM", 4, 4), + "frag": ("MFS_FRAG", 1, 1), + "tunnel flags": ("MFS_TNL_FLAGS", 2, 2), + "TCP flags": ("MFS_TCP_FLAGS", 2, 2)} PREREQS = {"none": "MFP_NONE", "ARP": "MFP_ARP", diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 757843dfb..3bdca62be 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -1684,39 +1684,35 @@ static char * mf_from_integer_string(const struct mf_field *mf, const char *s, uint8_t *valuep, uint8_t *maskp) { - unsigned long long int integer, mask; char *tail; - int i; + const char *err_str = ""; + int err; - errno = 0; - integer = strtoull(s, &tail, 0); - if (errno || (*tail != '\0' && *tail != '/')) { + err = parse_int_string(s, valuep, mf->n_bytes, &tail); + if (err || (*tail != '\0' && *tail != '/')) { + err_str = "value"; goto syntax_error; } if (*tail == '/') { - mask = strtoull(tail + 1, &tail, 0); - if (errno || *tail != '\0') { + err = parse_int_string(tail + 1, maskp, mf->n_bytes, &tail); + if (err || *tail != '\0') { + err_str = "mask"; goto syntax_error; } } else { - mask = ULLONG_MAX; + memset(maskp, 0xff, mf->n_bytes); } - for (i = mf->n_bytes - 1; i >= 0; i--) { - valuep[i] = integer; - maskp[i] = mask; - integer >>= 8; - mask >>= 8; - } - if (integer) { - return xasprintf("%s: value too large for %u-byte field %s", - s, mf->n_bytes, mf->name); - } return NULL; syntax_error: - return xasprintf("%s: bad syntax for %s", s, mf->name); + if (err == ERANGE) { + return xasprintf("%s: %s too large for %u-byte field %s", + s, err_str, mf->n_bytes, mf->name); + } else { + return xasprintf("%s: bad syntax for %s %s", s, mf->name, err_str); + } } static char * @@ -2111,33 +2107,25 @@ static void mf_format_integer_string(const struct mf_field *mf, const uint8_t *valuep, const uint8_t *maskp, struct ds *s) { - unsigned long long int integer; - int i; - - ovs_assert(mf->n_bytes <= 8); - - integer = 0; - for (i = 0; i < mf->n_bytes; i++) { - integer = (integer << 8) | valuep[i]; - } if (mf->string == MFS_HEXADECIMAL) { - ds_put_format(s, "%#llx", integer); + ds_put_hex(s, valuep, mf->n_bytes); } else { + unsigned long long int integer = 0; + int i; + + ovs_assert(mf->n_bytes <= 8); + for (i = 0; i < mf->n_bytes; i++) { + integer = (integer << 8) | valuep[i]; + } ds_put_format(s, "%lld", integer); } if (maskp) { - unsigned long long int mask; - - mask = 0; - for (i = 0; i < mf->n_bytes; i++) { - mask = (mask << 8) | maskp[i]; - } - /* I guess we could write the mask in decimal for MFS_DECIMAL but I'm * not sure that that a bit-mask written in decimal is ever easier to * understand than the same bit-mask written in hexadecimal. */ - ds_put_format(s, "/%#llx", mask); + ds_put_char(s, '/'); + ds_put_hex(s, maskp, mf->n_bytes); } } From a42023ee8d6c45395626bf37754202e85f3cd904 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 1 Jun 2015 19:07:13 -0700 Subject: [PATCH 103/146] lib/util.c: style fixes Covert tabs into spaces. Found by inspection. Signed-off-by: Andy Zhou Acked-by: Jesse Gross --- lib/util.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/util.c b/lib/util.c index c7e2b77f5..947398507 100644 --- a/lib/util.c +++ b/lib/util.c @@ -651,11 +651,11 @@ str_to_uint(const char *s, int base, unsigned int *u) long long ll; bool ok = str_to_llong(s, base, &ll); if (!ok || ll < 0 || ll > UINT_MAX) { - *u = 0; - return false; + *u = 0; + return false; } else { - *u = ll; - return true; + *u = ll; + return true; } } @@ -799,7 +799,7 @@ parse_int_string(const char *s, uint8_t *valuep, int field_width, char **tail) free: free(hexit_str); - return err; + return err; } errno = 0; From 7762f7c39a8f5f115427b598d9e768f9336af466 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Mon, 1 Jun 2015 12:26:40 +0900 Subject: [PATCH 104/146] Update my email address Signed-off-by: YAMAMOTO Takashi Acked-by: Justin Pettit --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index a2b40a733..f15510571 100644 --- a/AUTHORS +++ b/AUTHORS @@ -179,7 +179,7 @@ Vivien Bernet-Rollande vbr@soprive.net Wang Sheng-Hui shhuiw@gmail.com Wei Yongjun yjwei@cn.fujitsu.com William Fulton -YAMAMOTO Takashi yamamoto@valinux.co.jp +YAMAMOTO Takashi yamamoto@midokura.com Yasuhito Takamiya yasuhito@gmail.com yinpeijun yinpeijun@huawei.com Yu Zhiguo yuzg@cn.fujitsu.com From 3bcc10c0701c241ef62bdb32c5d21c060ad7590b Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Wed, 3 Jun 2015 15:55:16 +0100 Subject: [PATCH 105/146] dpif-netdev: Fix non-pmd thread queue id. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non pmd threads have a core_id == UINT32_MAX, while queue ids used by netdevs range from 0 to the number of CPUs. Therefore core ids cannot be used directly to select a queue. This commit introduces a simple mapping to fix the problem: pmd threads continue using queues 0 to N (where N is the number of CPUs in the system), while non pmd threads use queue N+1. Fixes: d5c199ea7ff7 ("netdev-dpdk: Properly support non pmd threads.") Reported-by: 차은호 Signed-off-by: Mark D. Gray Signed-off-by: Ethan Jackson Acked-by: Flavio Leitner Acked-by: Ethan Jackson --- lib/dpif-netdev.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 76d100335..7df95239d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -415,6 +415,8 @@ struct dp_netdev_pmd_thread { /* threads on same numa node. */ unsigned core_id; /* CPU core id of this pmd thread. */ int numa_id; /* numa node id of this pmd thread. */ + int tx_qid; /* Queue id used by this pmd thread to + * send packets on all netdevs */ /* Only a pmd thread can write on its own 'cycles' and 'stats'. * The main thread keeps 'stats_zero' and 'cycles_zero' as base @@ -1067,8 +1069,9 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, return ENOENT; } /* There can only be ovs_numa_get_n_cores() pmd threads, - * so creates a txq for each. */ - error = netdev_set_multiq(netdev, n_cores, dp->n_dpdk_rxqs); + * so creates a txq for each, and one extra for the non + * pmd threads. */ + error = netdev_set_multiq(netdev, n_cores + 1, dp->n_dpdk_rxqs); if (error && (error != EOPNOTSUPP)) { VLOG_ERR("%s, cannot set multiq", devname); return errno; @@ -2402,7 +2405,8 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) } /* Sets the new rx queue config. */ - err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores(), + err = netdev_set_multiq(port->netdev, + ovs_numa_get_n_cores() + 1, n_rxqs); if (err && (err != EOPNOTSUPP)) { VLOG_ERR("Failed to set dpdk interface %s rx_queue to:" @@ -2806,6 +2810,16 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos) return next; } +static int +core_id_to_qid(unsigned core_id) +{ + if (core_id != NON_PMD_CORE_ID) { + return core_id; + } else { + return ovs_numa_get_n_cores(); + } +} + /* Configures the 'pmd' based on the input argument. */ static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, @@ -2814,6 +2828,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->dp = dp; pmd->index = index; pmd->core_id = core_id; + pmd->tx_qid = core_id_to_qid(core_id); pmd->numa_id = numa_id; ovs_refcount_init(&pmd->ref_cnt); @@ -3329,7 +3344,7 @@ dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb, } static void -dp_netdev_drop_packets(struct dp_packet ** packets, int cnt, bool may_steal) +dp_netdev_drop_packets(struct dp_packet **packets, int cnt, bool may_steal) { if (may_steal) { int i; @@ -3387,7 +3402,7 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt, case OVS_ACTION_ATTR_OUTPUT: p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a))); if (OVS_LIKELY(p)) { - netdev_send(p->netdev, pmd->core_id, packets, cnt, may_steal); + netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal); return; } break; From 6e5879655a76c5798ea41ac9e8af72683673f25c Mon Sep 17 00:00:00 2001 From: Mijo Safradin Date: Wed, 27 May 2015 10:34:31 +0200 Subject: [PATCH 106/146] ovs-vswitchd: Update documentation Commit 7a6cf343a410d77e05ebd7bf5b5ade52803879ae raised the MAXFD limit from 7500 to 65535. Signed-off-by: Mijo Safradin Signed-off-by: Ben Pfaff --- vswitchd/ovs-vswitchd.8.in | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index b9eb00461..49c2a409d 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -291,12 +291,13 @@ We believe these limits to be accurate as of this writing. These limits assume the use of the Linux kernel datapath. . .IP \(bu -\fBovs\-vswitchd\fR started through \fBovs\-ctl\fR(8) provides a limit of 7500 +\fBovs\-vswitchd\fR started through \fBovs\-ctl\fR(8) provides a limit of 65535 file descriptors. The limits on the number of bridges and ports is decided by the availability of file descriptors. With the Linux kernel datapath, creation -of a single bridge consumes 3 file descriptors and adding a port consumes -1 file descriptor. Performance will degrade beyond 1,024 ports per bridge due -to fixed hash table sizing. Other platforms may have different limitations. +of a single bridge consumes three file descriptors and adding a port consumes +"n-handler-threads" file descriptors per bridge port. Performance will degrade +beyond 1,024 ports per bridge due to fixed hash table sizing. Other platforms +may have different limitations. . .IP \(bu 2,048 MAC learning entries per bridge, by default. (This is From a216c3bd7e9143f6be47fde1c06625b55c87b640 Mon Sep 17 00:00:00 2001 From: Alin Serdean Date: Fri, 5 Jun 2015 18:39:37 +0000 Subject: [PATCH 107/146] Link library updates for appveyor Add the library iphlpapi to the appveyor.yml build script. Signed-off-by: Alin Gabriel Serdean Signed-off-by: Gurucharan Shetty --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index a14f0fc50..863b5616e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -39,5 +39,5 @@ build_script: - C:\MinGW\msys\1.0\bin\bash -lc "cp /c/pthreads-win32/Pre-built.2/dll/x86/*.dll /c/openvswitch/." - C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=-lws2_32 --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudioddk=\"Win8.1 Debug\"" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudioddk=\"Win8.1 Debug\"" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make" From 332eafce794d8c6d10474d3c802ef886a05a453c Mon Sep 17 00:00:00 2001 From: Gurucharan Shetty Date: Fri, 5 Jun 2015 11:37:52 -0700 Subject: [PATCH 108/146] appveyor: Add a newer ssl link. The older version is no longer available for download. Signed-off-by: Gurucharan Shetty Acked-by: Alin Serdean --- appveyor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 863b5616e..ebd937bf1 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -15,9 +15,9 @@ init: Invoke-WebRequest $source -OutFile $destination - $source = "http://slproweb.com/download/Win32OpenSSL-1_0_1L.exe" + $source = "https://slproweb.com/download/Win32OpenSSL-1_0_2a.exe" - $destination = "C:\ovs-build-downloads\Win32OpenSSL-1_0_1L.exe" + $destination = "C:\ovs-build-downloads\Win32OpenSSL-1_0_2a.exe" Invoke-WebRequest $source -OutFile $destination @@ -27,7 +27,7 @@ init: cd C:\ovs-build-downloads - .\Win32OpenSSL-1_0_1L.exe /silent /verysilent /sp- /suppressmsgboxes + .\Win32OpenSSL-1_0_2a.exe /silent /verysilent /sp- /suppressmsgboxes Start-Sleep -s 30 From ea0797c929dc663ed443f07cd0bfc9d788d11367 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 5 Jun 2015 08:13:28 -0700 Subject: [PATCH 109/146] ofproto-dpif: Avoid creating OpenFlow ports for duplicate tunnels. Until now, when two tunnels had an identical configuration, both of them were assigned OpenFlow ports, but only one of those OpenFlow ports was functional. With this commit, only one of the two (or more) identically configured tunnels will be assigned an OpenFlow port number. Reported-by: Keith Holleman Signed-off-by: Ben Pfaff Co-authored-by: Andy Zhou Signed-off-by: Andy Zhou --- AUTHORS | 1 + ofproto/ofproto-dpif.c | 10 ++++++++-- ofproto/tunnel.c | 14 ++++++++++---- ofproto/tunnel.h | 6 +++--- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/AUTHORS b/AUTHORS index f15510571..28899a885 100644 --- a/AUTHORS +++ b/AUTHORS @@ -277,6 +277,7 @@ Joan Cirer joan@ev0.net John Darrington john@darrington.wattle.id.au John Galgay john@galgay.net John Hurley john.hurley@netronome.com +Keith Holleman hollemanietf@gmail.com K 華 k940545@hotmail.com Kevin Mancuso kevin.mancuso@rackspace.com Kiran Shanbhog kiran@vmware.com diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index c1daa1dc1..bd4530567 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1671,8 +1671,14 @@ port_construct(struct ofport *port_) if (netdev_get_tunnel_config(netdev)) { atomic_count_inc(&ofproto->backer->tnl_count); - tnl_port_add(port, port->up.netdev, port->odp_port, - ovs_native_tunneling_is_on(ofproto), namebuf); + error = tnl_port_add(port, port->up.netdev, port->odp_port, + ovs_native_tunneling_is_on(ofproto), namebuf); + if (error) { + atomic_count_dec(&ofproto->backer->tnl_count); + dpif_port_destroy(&dpif_port); + return error; + } + port->is_tunnel = true; if (ofproto->ipfix) { dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port); diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 3ea0eb44f..d2ac7c6a8 100644 --- a/ofproto/tunnel.c +++ b/ofproto/tunnel.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2013, 2014 Nicira, Inc. +/* Copyright (c) 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -203,14 +203,20 @@ tnl_port_add__(const struct ofport_dpif *ofport, const struct netdev *netdev, /* Adds 'ofport' to the module with datapath port number 'odp_port'. 'ofport's * must be added before they can be used by the module. 'ofport' must be a - * tunnel. */ -void + * tunnel. + * + * Returns 0 if successful, otherwise a positive errno value. */ +int tnl_port_add(const struct ofport_dpif *ofport, const struct netdev *netdev, odp_port_t odp_port, bool native_tnl, const char name[]) OVS_EXCLUDED(rwlock) { + bool ok; + fat_rwlock_wrlock(&rwlock); - tnl_port_add__(ofport, netdev, odp_port, true, native_tnl, name); + ok = tnl_port_add__(ofport, netdev, odp_port, true, native_tnl, name); fat_rwlock_unlock(&rwlock); + + return ok ? 0 : EEXIST; } /* Checks if the tunnel represented by 'ofport' reconfiguration due to changes diff --git a/ofproto/tunnel.h b/ofproto/tunnel.h index 6181762b2..b8415abb2 100644 --- a/ofproto/tunnel.h +++ b/ofproto/tunnel.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013 Nicira, Inc. +/* Copyright (c) 2013, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,8 +33,8 @@ void ofproto_tunnel_init(void); bool tnl_port_reconfigure(const struct ofport_dpif *, const struct netdev *, odp_port_t, bool native_tnl, const char name[]); -void tnl_port_add(const struct ofport_dpif *, const struct netdev *, - odp_port_t odp_port, bool native_tnl, const char name[]); +int tnl_port_add(const struct ofport_dpif *, const struct netdev *, + odp_port_t odp_port, bool native_tnl, const char name[]); void tnl_port_del(const struct ofport_dpif *); const struct ofport_dpif *tnl_port_receive(const struct flow *); From 3da29e32942e9ad17bb8b37275eed38f78ed9fba Mon Sep 17 00:00:00 2001 From: Sabyasachi Sengupta Date: Fri, 5 Jun 2015 22:14:37 -0700 Subject: [PATCH 110/146] ofproto-dpif: Use xzalloc instead of xmalloc. Use xzalloc instead of xmalloc for some key structure allocations in ofproto-dpif (viz. ofproto_dpif, ofport_dpif and rule_dpif) so as to prevent uninitialized values in these structures. Signed-off-by: Sabyasachi Sengupta Signed-off-by: Ben Pfaff --- ofproto/ofproto-dpif.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index bd4530567..22e5d5f34 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -812,7 +812,7 @@ static int add_internal_flows(struct ofproto_dpif *); static struct ofproto * alloc(void) { - struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto); + struct ofproto_dpif *ofproto = xzalloc(sizeof *ofproto); return &ofproto->up; } @@ -1608,7 +1608,7 @@ query_tables(struct ofproto *ofproto, static struct ofport * port_alloc(void) { - struct ofport_dpif *port = xmalloc(sizeof *port); + struct ofport_dpif *port = xzalloc(sizeof *port); return &port->up; } @@ -3882,7 +3882,7 @@ static struct rule_dpif *rule_dpif_cast(const struct rule *rule) static struct rule * rule_alloc(void) { - struct rule_dpif *rule = xmalloc(sizeof *rule); + struct rule_dpif *rule = xzalloc(sizeof *rule); return &rule->up; } From d6943394576b1cfc69929dbde2513617a46a4e10 Mon Sep 17 00:00:00 2001 From: "Thomas F. Herbert" Date: Tue, 2 Jun 2015 13:55:28 -0400 Subject: [PATCH 111/146] Add support functions for 8021.ad push and pop vlan. Changes to allow the tpid to be specified and all vlan tpid checking to be generalized. Signed-off-by: Thomas F Herbert Signed-off-by: Ben Pfaff --- AUTHORS | 1 + lib/odp-execute.c | 2 +- lib/packets.c | 8 ++++---- lib/packets.h | 7 +++++++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/AUTHORS b/AUTHORS index 28899a885..24e854de7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -166,6 +166,7 @@ SUGYO Kazushi sugyo.org@gmail.com Tadaaki Nagao nagao@stratosphere.co.jp Terry Wilson twilson@redhat.com Tetsuo NAKAGAWA nakagawa@mxc.nes.nec.co.jp +Thomas F. Herbert thomasfherbert@gmail.com Thomas Goirand zigo@debian.org Thomas Graf tgraf@noironetworks.com Thomas Lacroix thomas.lacroix@citrix.com diff --git a/lib/odp-execute.c b/lib/odp-execute.c index b7851048b..c67645100 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -555,7 +555,7 @@ odp_execute_actions(void *dp, struct dp_packet **packets, int cnt, bool steal, const struct ovs_action_push_vlan *vlan = nl_attr_get(a); for (i = 0; i < cnt; i++) { - eth_push_vlan(packets[i], htons(ETH_TYPE_VLAN), vlan->vlan_tci); + eth_push_vlan(packets[i], vlan->vlan_tpid, vlan->vlan_tci); } break; } diff --git a/lib/packets.c b/lib/packets.c index 016b12bd4..965754fed 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -192,15 +192,15 @@ eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci) /* Removes outermost VLAN header (if any is present) from 'packet'. * - * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header - * or may be NULL if there are no MPLS headers. */ + * 'packet->l2_5' should initially point to 'packet''s outer-most VLAN header + * or may be NULL if there are no VLAN headers. */ void eth_pop_vlan(struct dp_packet *packet) { struct vlan_eth_header *veh = dp_packet_l2(packet); if (veh && dp_packet_size(packet) >= sizeof *veh - && veh->veth_type == htons(ETH_TYPE_VLAN)) { + && eth_type_vlan(veh->veth_type)) { memmove((char *)veh + VLAN_HEADER_LEN, veh, 2 * ETH_ADDR_LEN); dp_packet_resize_l2(packet, -VLAN_HEADER_LEN); @@ -217,7 +217,7 @@ set_ethertype(struct dp_packet *packet, ovs_be16 eth_type) return; } - if (eh->eth_type == htons(ETH_TYPE_VLAN)) { + if (eth_type_vlan(eh->eth_type)) { ovs_be16 *p; char *l2_5 = dp_packet_l2_5(packet); diff --git a/lib/packets.h b/lib/packets.h index b146a5069..e22267efc 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -255,6 +255,13 @@ static inline bool eth_type_mpls(ovs_be16 eth_type) eth_type == htons(ETH_TYPE_MPLS_MCAST); } +static inline bool eth_type_vlan(ovs_be16 eth_type) +{ + return eth_type == htons(ETH_TYPE_VLAN_8021Q) || + eth_type == htons(ETH_TYPE_VLAN_8021AD); +} + + /* Minimum value for an Ethernet type. Values below this are IEEE 802.2 frame * lengths. */ #define ETH_TYPE_MIN 0x600 From f171fa88765ea7730f45cabdb5a4f31b7414c97a Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Sun, 7 Jun 2015 11:38:52 -0700 Subject: [PATCH 112/146] odp-util: Make sure vlan tci mask has exact match for VLAN_CFI. OVS datapath has check which prevents the installation of flow that matches VLAN TCI but does not have exact match for VLAN_CFI bit. To follow this rule, ovs userspace must make sure the flow key for datapath flow matching VLAN TCI has exact match for VLAN_CFI bit. Before this commit, this is not enforced, so OpenFlow flow like "vlan_tci=0x000a/0x0fff,action=output:local" can generate datapath flow like "vlan(vid=10/0xfff,pcp=0/0x0,cfi=1/0)". With the OVS datapath check, the installation of such datapath flow will be rejected with: "|WARN|system@ovs-system: failed to put[create][modify] (Invalid argument)" This commit makes ovs userspace always exact match the VLAN_CFI bit if the flow matches VLAN TCI. Reported-by: Ronald Lee Signed-off-by: Alex Wang Acked-by: Ben Pfaff Acked-by: Jarno Rajahalme --- ofproto/ofproto-dpif-xlate.c | 4 ++++ tests/ofproto-dpif.at | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 71b8beffc..6bb85188f 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -5033,6 +5033,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) wc->masks.tp_src &= htons(UINT8_MAX); wc->masks.tp_dst &= htons(UINT8_MAX); } + /* VLAN_TCI CFI bit must be matched if any of the TCI is matched. */ + if (wc->masks.vlan_tci) { + wc->masks.vlan_tci |= htons(VLAN_CFI); + } } } diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index b5a9ad917..f9015c789 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -6673,3 +6673,19 @@ icmp6,vlan_tci=0x0000,dl_src=00:00:86:05:80:da,dl_dst=00:60:97:07:69:ea,ipv6_src OVS_VSWITCHD_STOP AT_CLEANUP +# Tests the exact match of CFI bit in installed datapath flows matching VLAN. +AT_SETUP([ofproto-dpif - vlan matching]) +OVS_VSWITCHD_START( + [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1]) +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) + +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flow br0 "vlan_tci=0x000a/0x0fff,action=output:local"]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x8100),vlan(vid=10,pcp=0),encap(eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0))']) + +AT_CHECK([cat ovs-vswitchd.log | grep 'in_port=[[1]]' | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl +recirc_id=0,ip,in_port=1,dl_vlan=10,nw_frag=no, actions: +]) +OVS_VSWITCHD_STOP +AT_CLEANUP From 53d9f3838f7d57b6baa327e6835287f44637b508 Mon Sep 17 00:00:00 2001 From: Zang MingJie Date: Sun, 7 Jun 2015 13:21:18 -0700 Subject: [PATCH 113/146] ovs-appctl-bashcomp: bash_completion shouldn't modify user environment. Signed-off-by: Zang MingJie Acked-by: Alex Wang --- utilities/ovs-appctl-bashcomp.bash | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/utilities/ovs-appctl-bashcomp.bash b/utilities/ovs-appctl-bashcomp.bash index 67a268eda..f7fb83047 100755 --- a/utilities/ovs-appctl-bashcomp.bash +++ b/utilities/ovs-appctl-bashcomp.bash @@ -598,9 +598,6 @@ _ovs_command_complete() { return 0 } -# Needed for the sorting of completions in display. -export LC_ALL=C - # Debug mode. if [ "$1" = "debug" ]; then shift @@ -624,4 +621,4 @@ else complete -F _ovs_command_complete ovs-ofctl complete -F _ovs_command_complete ovs-dpctl complete -F _ovs_command_complete ovsdb-tool -fi \ No newline at end of file +fi From d8485a90af4ca1448ed7e33354994c97785c851d Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Sun, 7 Jun 2015 13:31:23 -0700 Subject: [PATCH 114/146] tests: Fix unit test failure. This commit fixes unit test failure caused by commit f171fa8 (odp-util: Make sure vlan tci mask has exact match for VLAN_CFI.). Signed-off-by: Alex Wang Acked-by: Ben Pfaff --- tests/ofproto-dpif.at | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index f9015c789..76f2c86a3 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -6088,8 +6088,8 @@ done sleep 1 dnl The original flow is missing due to a revalidation. AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl -recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x0fff,dl_src=50:54:00:00:00:09,nw_frag=no, actions: -recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x0fff,dl_src=50:54:00:00:00:0b,nw_frag=no, actions: +recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,nw_frag=no, actions: +recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:0b,nw_frag=no, actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP From d183efc22b2b0b84c341500a73f567f98f4d00f4 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Sat, 6 Jun 2015 01:06:30 +0300 Subject: [PATCH 115/146] This commit adds the windows installer to the OVS tree. Requirements are the following: Visual Studio Community 2013 WiX Toolset 3.9 Microsoft_VC120_CRT_x86.msm More detailed information on the requirements and build instructions can be found under: https://github.com/cloudbase/ovs-windows-installer/blob/master/README.rst To run and make the installer issue the following: ./boot.sh ./configure CC=./build-aux/cccl LD="`which link`" \ LIBS="-lws2_32 -liphlpapi" --prefix="C:/openvswitch/usr" \ --localstatedir="C:/openvswitch/var" --sysconfdir="C:/openvswitch/etc" \ --with-pthread="C:/pthread" --with-vstudiotarget="Release" make clean && make -j16 windows_installer To uninstall one could use the following Powershell commandlets: $app = Get-WmiObject -Class Win32_Product | Where-Object ` { $_.Name -match "Open Vswitch" } $app.Uninstall() Signed-off-by: Alin Gabriel Serdean Co-authored-by: Alessandro Pilotti Signed-off-by: Alessandro Pilotti Signed-off-by: Gurucharan Shetty --- INSTALL.Windows.md | 8 +- Makefile.am | 7 +- appveyor.yml | 2 +- datapath-windows/ovsext.sln | 50 ++-- m4/openvswitch.m4 | 16 +- windows/.gitignore | 191 +++++++++++++ windows/README.rst | 48 ++++ windows/automake.mk | 57 ++++ windows/ovs-windows-installer.sln | 19 ++ .../Actions/OVSActions.js | 258 ++++++++++++++++++ .../ovs-windows-installer/Binaries/.gitignore | 3 + .../ovs-windows-installer/CustomActions.wxs | 67 +++++ .../Dialogs/BeginningDialog.wxs | 49 ++++ .../Dialogs/MyEndDialog.wxs | 44 +++ .../Dialogs/MyTroubleshootDialog.wxs | 42 +++ .../Dialogs/UserFinishDialog.wxs | 44 +++ .../ovs-windows-installer/Driver/.gitignore | 5 + .../Driver/Win8.1/.gitignore | 3 + .../Driver/Win8/.gitignore | 3 + windows/ovs-windows-installer/License.rtf | 209 ++++++++++++++ windows/ovs-windows-installer/Product.wxs | 249 +++++++++++++++++ .../ovs-windows-installer/Redist/.gitignore | 3 + .../ovs-windows-installer/Services/.gitignore | 3 + .../ovs-windows-installer/Symbols/.gitignore | 3 + windows/ovs-windows-installer/UI.wxs | 72 +++++ .../ovs-windows-installer/images/bannrbmp.bmp | Bin 0 -> 134958 bytes .../ovs-windows-installer/images/dlgbmp.bmp | Bin 0 -> 460568 bytes .../ovs-windows-installer.wixproj | 79 ++++++ 28 files changed, 1493 insertions(+), 41 deletions(-) create mode 100644 windows/.gitignore create mode 100644 windows/README.rst create mode 100644 windows/automake.mk create mode 100644 windows/ovs-windows-installer.sln create mode 100644 windows/ovs-windows-installer/Actions/OVSActions.js create mode 100644 windows/ovs-windows-installer/Binaries/.gitignore create mode 100644 windows/ovs-windows-installer/CustomActions.wxs create mode 100644 windows/ovs-windows-installer/Dialogs/BeginningDialog.wxs create mode 100644 windows/ovs-windows-installer/Dialogs/MyEndDialog.wxs create mode 100644 windows/ovs-windows-installer/Dialogs/MyTroubleshootDialog.wxs create mode 100644 windows/ovs-windows-installer/Dialogs/UserFinishDialog.wxs create mode 100644 windows/ovs-windows-installer/Driver/.gitignore create mode 100644 windows/ovs-windows-installer/Driver/Win8.1/.gitignore create mode 100644 windows/ovs-windows-installer/Driver/Win8/.gitignore create mode 100644 windows/ovs-windows-installer/License.rtf create mode 100644 windows/ovs-windows-installer/Product.wxs create mode 100644 windows/ovs-windows-installer/Redist/.gitignore create mode 100644 windows/ovs-windows-installer/Services/.gitignore create mode 100644 windows/ovs-windows-installer/Symbols/.gitignore create mode 100644 windows/ovs-windows-installer/UI.wxs create mode 100644 windows/ovs-windows-installer/images/bannrbmp.bmp create mode 100644 windows/ovs-windows-installer/images/dlgbmp.bmp create mode 100644 windows/ovs-windows-installer/ovs-windows-installer.wixproj diff --git a/INSTALL.Windows.md b/INSTALL.Windows.md index 0ec0af0b6..3171e47d7 100644 --- a/INSTALL.Windows.md +++ b/INSTALL.Windows.md @@ -129,17 +129,17 @@ You can open the extensions.sln file in the IDE and build the solution. * The kernel datapath can be compiled from command line as well. The top level 'make' will invoke building the kernel datapath, if the -'--with-vstudioddk' argument is specified while configuring the package. +'--with-vstudiotarget' argument is specified while configuring the package. For example, % ./configure CC=./build-aux/cccl LD="`which link`" \ LIBS="-lws2_32 -liphlpapi" --prefix="C:/openvswitch/usr" \ --localstatedir="C:/openvswitch/var" --sysconfdir="C:/openvswitch/etc" \ --with-pthread="C:/pthread" --enable-ssl \ - --with-openssl="C:/OpenSSL-Win32" --with-vstudioddk="" + --with-openssl="C:/OpenSSL-Win32" --with-vstudiotarget="" - Possible values for "" are: - "Win8.1 Debug", "Win8.1 Release", "Win8 Debug" and "Win8 Release". + Possible values for "" are: + "Debug" and "Release" Installing the Kernel module ---------------------------- diff --git a/Makefile.am b/Makefile.am index 8bc431bfc..59a14667d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -335,11 +335,13 @@ CLEANFILES += manpage-dep-check if VSTUDIO_DDK ALL_LOCAL += ovsext_make ovsext_make: datapath-windows/ovsext.sln - MSBuild.exe datapath-windows/ovsext.sln /target:Build /property:Configuration="$(VSTUDIO_CONFIG)" + MSBuild.exe datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8$(VSTUDIO_CONFIG)" + MSBuild.exe datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8.1$(VSTUDIO_CONFIG)" CLEAN_LOCAL += ovsext_clean ovsext_clean: datapath-windows/ovsext.sln - MSBuild.exe datapath-windows/ovsext.sln /target:Clean /property:Configuration="$(VSTUDIO_CONFIG)" + MSBuild.exe datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win8$(VSTUDIO_CONFIG)" + MSBuild.exe datapath-windows/ovsext.sln /target:Clean /property:Configuration="Win8.1$(VSTUDIO_CONFIG)" endif dist-hook: $(DIST_HOOKS) @@ -377,3 +379,4 @@ include tutorial/automake.mk include vtep/automake.mk include datapath-windows/automake.mk include datapath-windows/include/automake.mk +include windows/automake.mk diff --git a/appveyor.yml b/appveyor.yml index ebd937bf1..370de3fe8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -39,5 +39,5 @@ build_script: - C:\MinGW\msys\1.0\bin\bash -lc "cp /c/pthreads-win32/Pre-built.2/dll/x86/*.dll /c/openvswitch/." - C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudioddk=\"Win8.1 Debug\"" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -liphlpapi\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\"" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make" diff --git a/datapath-windows/ovsext.sln b/datapath-windows/ovsext.sln index 9cb767d75..60e9318a2 100644 --- a/datapath-windows/ovsext.sln +++ b/datapath-windows/ovsext.sln @@ -1,6 +1,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 -VisualStudioVersion = 12.0.21005.1 +VisualStudioVersion = 12.0.31101.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Package", "Package", "{6BA8554E-AE50-49B0-9C98-4592447FEF8D}" EndProject @@ -12,32 +12,32 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ovsext", "ovsext\ovsext.vcx EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Win8 Debug|x64 = Win8 Debug|x64 - Win8 Release|x64 = Win8 Release|x64 - Win8.1 Debug|x64 = Win8.1 Debug|x64 - Win8.1 Release|x64 = Win8.1 Release|x64 + Win8.1Debug|x64 = Win8.1Debug|x64 + Win8.1Release|x64 = Win8.1Release|x64 + Win8Debug|x64 = Win8Debug|x64 + Win8Release|x64 = Win8Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8 Debug|x64.ActiveCfg = Win8 Debug|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8 Debug|x64.Build.0 = Win8 Debug|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8 Release|x64.ActiveCfg = Win8 Release|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8 Release|x64.Build.0 = Win8 Release|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1 Debug|x64.ActiveCfg = Win8.1 Debug|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1 Debug|x64.Build.0 = Win8.1 Debug|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1 Release|x64.ActiveCfg = Win8.1 Release|x64 - {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1 Release|x64.Build.0 = Win8.1 Release|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8 Debug|x64.ActiveCfg = Win8 Debug|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8 Debug|x64.Build.0 = Win8 Debug|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8 Debug|x64.Deploy.0 = Win8 Debug|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8 Release|x64.ActiveCfg = Win8 Release|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8 Release|x64.Build.0 = Win8 Release|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8 Release|x64.Deploy.0 = Win8 Release|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1 Debug|x64.ActiveCfg = Win8.1 Debug|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1 Debug|x64.Build.0 = Win8.1 Debug|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1 Debug|x64.Deploy.0 = Win8.1 Debug|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1 Release|x64.ActiveCfg = Win8.1 Release|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1 Release|x64.Build.0 = Win8.1 Release|x64 - {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1 Release|x64.Deploy.0 = Win8.1 Release|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1Debug|x64.ActiveCfg = Win8.1 Debug|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1Debug|x64.Build.0 = Win8.1 Debug|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1Release|x64.ActiveCfg = Win8.1 Release|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8.1Release|x64.Build.0 = Win8.1 Release|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8Debug|x64.ActiveCfg = Win8 Debug|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8Debug|x64.Build.0 = Win8 Debug|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8Release|x64.ActiveCfg = Win8 Release|x64 + {911D7389-3E61-449F-B8F3-14AD7EE9A0F2}.Win8Release|x64.Build.0 = Win8 Release|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1Debug|x64.ActiveCfg = Win8.1 Debug|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1Debug|x64.Build.0 = Win8.1 Debug|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1Debug|x64.Deploy.0 = Win8.1 Debug|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1Release|x64.ActiveCfg = Win8.1 Release|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1Release|x64.Build.0 = Win8.1 Release|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8.1Release|x64.Deploy.0 = Win8.1 Release|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8Debug|x64.ActiveCfg = Win8 Debug|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8Debug|x64.Build.0 = Win8 Debug|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8Debug|x64.Deploy.0 = Win8 Debug|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8Release|x64.ActiveCfg = Win8 Release|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8Release|x64.Build.0 = Win8 Release|x64 + {63FE215D-98BE-4440-8081-C6160EFB80FA}.Win8Release|x64.Deploy.0 = Win8 Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index 8ace9ce11..2b84b44fb 100644 --- a/m4/openvswitch.m4 +++ b/m4/openvswitch.m4 @@ -120,16 +120,14 @@ dnl OVS_CHECK_WINDOWS dnl dnl Configure Visual Studio solution build AC_DEFUN([OVS_CHECK_VISUAL_STUDIO_DDK], [ -AC_ARG_WITH([vstudioddk], - [AS_HELP_STRING([--with-vstudioddk=version_type], - [Visual Studio DDK version type e.g. Win8.1 Release])], +AC_ARG_WITH([vstudiotarget], + [AS_HELP_STRING([--with-vstudiotarget=target_type], + [Target type: Debug/Release])], [ case "$withval" in - "Win8.1 Release") ;; - "Win8.1 Debug") ;; - "Win8 Release") ;; - "Win8 Debug") ;; - *) AC_MSG_ERROR([No good Visual Studio configuration found]) ;; + "Release") ;; + "Debug") ;; + *) AC_MSG_ERROR([No valid Visual Studio configuration found]) ;; esac VSTUDIO_CONFIG=$withval @@ -139,7 +137,7 @@ AC_ARG_WITH([vstudioddk], ) AC_SUBST([VSTUDIO_CONFIG]) - AC_DEFINE([VSTUDIO_DDK], [1], [System uses the Visual Studio DDK version module.]) + AC_DEFINE([VSTUDIO_DDK], [1], [System uses the Visual Studio build target.]) AM_CONDITIONAL([VSTUDIO_DDK], [test -n "$VSTUDIO_CONFIG"]) ]) diff --git a/windows/.gitignore b/windows/.gitignore new file mode 100644 index 000000000..1cc9b1a65 --- /dev/null +++ b/windows/.gitignore @@ -0,0 +1,191 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.sln.docstates + +ovs-windows-installer/Binaries.wxs +ovs-windows-installer/Symbols.wxs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +x64/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +#NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding addin-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +*.ncrunch* +_NCrunch_* +.*crunch*.local.xml + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml + +# NuGet Packages Directory +packages/* +## TODO: If the tool you use requires repositories.config uncomment the next line +#!packages/repositories.config + +# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets +# This line needs to be after the ignore of the build folder (and the packages folder if the line above has been uncommented) +!packages/build/ + +# Windows Azure Build Output +csx/ +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Others +sql/ +*.Cache +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.pfx +*.publishsettings +node_modules/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file to a newer +# Visual Studio version. Backup files are not needed, because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# ========================= +# Windows detritus +# ========================= + +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ diff --git a/windows/README.rst b/windows/README.rst new file mode 100644 index 000000000..37fb98d5a --- /dev/null +++ b/windows/README.rst @@ -0,0 +1,48 @@ +Open vSwitch Windows installer +============================== + +This project generates a MSI installer for Open vSwitch on Windows, including +CLI executables, services and the Hyper-V vswitch forwarding extension. + +Requirements +------------ + +Visual Studio 2013 community, professional, premium or ultimate edition +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Visual Studio Community 2013 is freely available at: +https://www.visualstudio.com/en-us/products/visual-studio-community-vs.aspx + +WiX Toolset 3.9 +^^^^^^^^^^^^^^^ + +Download and install from: +http://wixtoolset.org/releases/v3.9/stable + +Microsoft_VC120_CRT_x86.msm +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This Windows merge module is available with Visual Studio and contains the +Visual C++ 2013 x86 runtime redistributables files. +Copy the file in the *Redist* directory. + +Open vSwitch installer +---------------------- + +The installer will be generated under the following path: +* windows\ovs-windows-installer\bin\Release\OpenvSwitch.msi + +Note: the kernel driver needs to be signed. + + +Build instructions +------------------ + +Build the solution in the Visual Studio IDE or via command line: + + msbuild ovs-windows-installer.sln /p:Platform=x86 /p:Configuration=Release + +Silent installation +------------------- + + msiexec /i OpenvSwitch.msi ADDLOCAL=OpenvSwitchCLI,OpenvSwitchDriver /l*v log.txt diff --git a/windows/automake.mk b/windows/automake.mk new file mode 100644 index 000000000..c8165a40c --- /dev/null +++ b/windows/automake.mk @@ -0,0 +1,57 @@ +# Copyright 2015 Cloudbase Solutions Srl +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License.You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the +# License for the specific language governing permissions and limitations +# under the License. + +PTHREAD_TEMP_DIR=`echo "$(PTHREAD_LDFLAGS)" | sed 's|^.\(.*\).$:\1||'` +windows_installer: all +#Userspace files needed for the installer + cp -f $(top_srcdir)/datapath-windows/misc/OVS.psm1 windows/ovs-windows-installer/Services/OVS.psm1 + cp -f $(top_srcdir)/vswitchd/vswitch.ovsschema windows/ovs-windows-installer/Services/vswitch.ovsschema + cp -f $(top_srcdir)/vswitchd/ovs-vswitchd.exe windows/ovs-windows-installer/Services/ovs-vswitchd.exe + cp -f $(top_srcdir)/ovsdb/ovsdb-server.exe windows/ovs-windows-installer/Services/ovsdb-server.exe + cp -f $(top_srcdir)/utilities/*.exe windows/ovs-windows-installer/Binaries/ + cp -f $(top_srcdir)/utilities/*.pdb windows/ovs-windows-installer/Symbols/ + cp -f $(top_srcdir)/ovsdb/ovsdb-client.exe windows/ovs-windows-installer/Binaries/ovsdb-client.exe + cp -f $(top_srcdir)/ovsdb/ovsdb-tool.exe windows/ovs-windows-installer/Binaries/ovsdb-tool.exe + cp -f $(top_srcdir)/ovsdb/ovsdb-client.pdb windows/ovs-windows-installer/Symbols/ + cp -f $(top_srcdir)/ovsdb/ovsdb-tool.pdb windows/ovs-windows-installer/Symbols/ +#Third party files needed by the installer + cp -f $(PTHREAD_TEMP_DIR)/../../dll/x86/*.dll windows/ovs-windows-installer/Binaries/ + cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC120_CRT_x86.msm" windows/ovs-windows-installer/Redist/Microsoft_VC120_CRT_x86.msm +#Forwarding extension files needed for the installer + cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8/ovsext.cat + cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8/ovsext.inf + cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/OVSExt.sys windows/ovs-windows-installer/Driver/Win8/OVSExt.sys + cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8.1/ovsext.cat + cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8.1/ovsext.inf + cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.sys windows/ovs-windows-installer/Driver/Win8.1/ovsext.sys + MSBuild.exe windows/ovs-windows-installer.sln /target:Build /property:Configuration="Release" + +EXTRA_DIST += \ + windows/.gitignore \ + windows/automake.mk \ + windows/README.rst \ + windows/ovs-windows-installer.sln \ + windows/ovs-windows-installer/Actions/OVSActions.js \ + windows/ovs-windows-installer/CustomActions.wxs \ + windows/ovs-windows-installer/Dialogs/BeginningDialog.wxs \ + windows/ovs-windows-installer/Dialogs/MyEndDialog.wxs \ + windows/ovs-windows-installer/Dialogs/MyTroubleshootDialog.wxs \ + windows/ovs-windows-installer/Dialogs/UserFinishDialog.wxs \ + windows/ovs-windows-installer/License.rtf \ + windows/ovs-windows-installer/Product.wxs \ + windows/ovs-windows-installer/UI.wxs \ + windows/ovs-windows-installer/images/bannrbmp.bmp \ + windows/ovs-windows-installer/images/dlgbmp.bmp \ + windows/ovs-windows-installer/ovs-windows-installer.wixproj + diff --git a/windows/ovs-windows-installer.sln b/windows/ovs-windows-installer.sln new file mode 100644 index 000000000..298c831bb --- /dev/null +++ b/windows/ovs-windows-installer.sln @@ -0,0 +1,19 @@ +? +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{930C7802-8A8C-48F9-8165-68863BCCD9DD}") = "ovs-windows-installer", "ovs-windows-installer\ovs-windows-installer.wixproj", "{259905A2-7434-4190-8A33-8FBA67171DD6}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {259905A2-7434-4190-8A33-8FBA67171DD6}.Release|x86.ActiveCfg = Release|x86 + {259905A2-7434-4190-8A33-8FBA67171DD6}.Release|x86.Build.0 = Release|x86 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/windows/ovs-windows-installer/Actions/OVSActions.js b/windows/ovs-windows-installer/Actions/OVSActions.js new file mode 100644 index 000000000..d686e6b58 --- /dev/null +++ b/windows/ovs-windows-installer/Actions/OVSActions.js @@ -0,0 +1,258 @@ +/* +Copyright 2015 Cloudbase Solutions Srl +All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); you may + not use this file except in compliance with the License. You may obtain + a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. +*/ + +// http://msdn.microsoft.com/en-us/library/sfw6660x(VS.85).aspx +var Buttons = +{ + OkOnly: 0, + OkCancel: 1, + AbortRetryIgnore: 2, + YesNoCancel: 3 +}; + +var Icons = +{ + Critical: 16, + Question: 32, + Exclamation: 48, + Information: 64 +} + +var MsgKind = +{ + Error: 0x01000000, + Warning: 0x02000000, + User: 0x03000000, + Log: 0x04000000 +}; + +// http://msdn.microsoft.com/en-us/library/aa371254(VS.85).aspx +var MsiActionStatus = +{ + None: 0, + Ok: 1, // success + Cancel: 2, + Abort: 3, + Retry: 4, // aka suspend? + Ignore: 5 // skip remaining actions; this is not an error. +}; + +var ServiceStartAction = { + Stop: "Stop", + Start: "Start", + Restart: "Restart" +}; + +var ServiceStartMode = { + Boot: "Boot", + System: "System", + Auto: "Auto", + Manual: "Manual", + Disabled: "Disabled" +}; + +function throwException(num, msg) { + throw { + number: num, + message: msg + }; +} + +function decimalToHexString(number) { + if (number < 0) + number = 0xFFFFFFFF + number + 1; + return number.toString(16).toUpperCase(); +} + +function logMessage(msg) { + var record = Session.Installer.CreateRecord(0); + record.StringData(0) = "CustomActions: " + msg; + Session.Message(MsgKind.Log, record); +} + +function logMessageEx(msg, type) { + var record = Session.Installer.CreateRecord(0); + record.StringData(0) = msg; + Session.Message(type, record); +} + +function logException(exc) { + var record = Session.Installer.CreateRecord(0); + record.StringData(0) = exc.message == "" ? "An exception occurred: 0x" + decimalToHexString(exc.number) : exc.message; + Session.Message(MsgKind.Error + Icons.Critical + Buttons.OkOnly, record); + + // Log the full exception as well + record.StringData(0) = "CustomAction exception details: 0x" + decimalToHexString(exc.number) + " : " + exc.message; + Session.Message(MsgKind.Log, record); +} + +function runCommand(cmd, expectedReturnValue, envVars, windowStyle, waitOnReturn, workingDir) { + var shell = new ActiveXObject("WScript.Shell"); + logMessage("Running command: " + cmd); + + if (envVars) { + var env = shell.Environment("Process"); + for (var k in envVars) + env(k) = envVars[k]; + } + + if (typeof windowStyle == 'undefined') + windowStyle = 0; + + if (typeof waitOnReturn == 'undefined') + waitOnReturn = true; + + if (typeof workingDir == 'undefined') + workingDir = null; + + if (workingDir) { + shell.CurrentDirectory = workingDir; + } + + var retVal = shell.run(cmd, windowStyle, waitOnReturn); + + if (waitOnReturn && expectedReturnValue != undefined && expectedReturnValue != null && retVal != expectedReturnValue) + throwException(-1, "Command failed. Return value: " + retVal.toString()); + + logMessage("Command completed. Return value: " + retVal); + + return retVal; +} + +function getWmiCimV2Svc() { + return GetObject("winmgmts:\\\\.\\root\\cimv2"); +} + +function getSafeArray(jsArr) { + var dict = new ActiveXObject("Scripting.Dictionary"); + for (var i = 0; i < jsArr.length; i++) + dict.add(i, jsArr[i]); + return dict.Items(); +} + +function invokeWMIMethod(svc, methodName, inParamsValues, wmiSvc, jobOutParamName) { + logMessage("Invoking " + methodName); + + var inParams = null; + if (inParamsValues) { + for (var k in inParamsValues) { + if (!inParams) + inParams = svc.Methods_(methodName).InParameters.SpawnInstance_(); + var val = inParamsValues[k]; + if (val instanceof Array) + inParams[k] = getSafeArray(val); + else + inParams[k] = val; + } + } + + var outParams = svc.ExecMethod_(methodName, inParams); + if (outParams.ReturnValue == 4096) { + var job = wmiSvc.Get(outParams[jobOutParamName]); + waitForJob(wmiSvc, job); + } + else if (outParams.ReturnValue != 0) + throwException(-1, methodName + " failed. Return value: " + outParams.ReturnValue.toString()); + + return outParams; +} + +function sleep(interval) { + // WScript.Sleep is not supported in MSI's WSH. Here's a workaround for the moment. + + // interval is ignored + var numPings = 2; + cmd = "ping -n " + numPings + " 127.0.0.1"; + + var shell = new ActiveXObject("WScript.Shell"); + shell.run(cmd, 0, true); +} + +function getService(serviceName) { + var wmiSvc = getWmiCimV2Svc(); + return wmiSvc.ExecQuery("SELECT * FROM Win32_Service WHERE Name='" + serviceName + "'").ItemIndex(0); +} + +function changeService(serviceName, startMode, startAction) { + var svc = getService(serviceName); + + if ((startAction == ServiceStartAction.Stop || startAction == ServiceStartAction.Restart) && svc.Started) + invokeWMIMethod(svc, "StopService"); + + if (startMode && svc.StartMode != startMode) + invokeWMIMethod(svc, "ChangeStartMode", + { + "StartMode": (startMode == ServiceStartMode.Auto ? "Automatic" : startMode) + }); + + if (startAction == ServiceStartAction.Restart && svc.Started) { + var wmiSvc = getWmiCimV2Svc(); + do { + sleep(200); + svc = wmiSvc.Get(svc.Path_); + } while (svc.Started); + } + + if ((startAction == ServiceStartAction.Start || startAction == ServiceStartAction.Restart) && !svc.Started) + invokeWMIMethod(svc, "StartService"); +} + +function runCommandAction() { + var exceptionMsg = null; + + try { + var data = Session.Property("CustomActionData").split('|'); + var i = 0; + var cmd = data[i++]; + var expectedRetValue = data.length > i ? data[i++] : 0; + var exceptionMsg = data.length > i ? data[i++] : null; + var workingDir = data.length > i ? data[i++] : null; + + runCommand(cmd, expectedRetValue, null, 0, true, workingDir); + return MsiActionStatus.Ok; + } + catch (ex) { + if (exceptionMsg) { + logMessageEx(exceptionMsg, MsgKind.Error + Icons.Critical + Buttons.OkOnly); + // log also the original exception + logMessage(ex.message); + } + else + logException(ex); + + return MsiActionStatus.Abort; + } +} + +function changeServiceAction() { + try { + var data = Session.Property("CustomActionData").split('|'); + var serviceName = data[0]; + var startMode = data[1]; + var startAction = data[2]; + + logMessage("Changing service " + serviceName + ", startMode: " + startMode + ", startAction: " + startAction); + + changeService(serviceName, startMode, startAction); + + return MsiActionStatus.Ok; + } + catch (ex) { + logMessage(ex.message); + return MsiActionStatus.Abort; + } +} \ No newline at end of file diff --git a/windows/ovs-windows-installer/Binaries/.gitignore b/windows/ovs-windows-installer/Binaries/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Binaries/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/CustomActions.wxs b/windows/ovs-windows-installer/CustomActions.wxs new file mode 100644 index 000000000..bce945518 --- /dev/null +++ b/windows/ovs-windows-installer/CustomActions.wxs @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/windows/ovs-windows-installer/Dialogs/BeginningDialog.wxs b/windows/ovs-windows-installer/Dialogs/BeginningDialog.wxs new file mode 100644 index 000000000..ca1780ff7 --- /dev/null +++ b/windows/ovs-windows-installer/Dialogs/BeginningDialog.wxs @@ -0,0 +1,49 @@ + + + + + + + + + + NOT Installed OR NOT PATCH + Installed AND PATCH + + + Installed AND PATCH + NOT Installed OR NOT PATCH + + + + + Installed AND PATCH + + + 1 + + + + + NOT Installed OR PATCH + + + + + \ No newline at end of file diff --git a/windows/ovs-windows-installer/Dialogs/MyEndDialog.wxs b/windows/ovs-windows-installer/Dialogs/MyEndDialog.wxs new file mode 100644 index 000000000..064ccc5b0 --- /dev/null +++ b/windows/ovs-windows-installer/Dialogs/MyEndDialog.wxs @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/windows/ovs-windows-installer/Dialogs/MyTroubleshootDialog.wxs b/windows/ovs-windows-installer/Dialogs/MyTroubleshootDialog.wxs new file mode 100644 index 000000000..f567ba6a8 --- /dev/null +++ b/windows/ovs-windows-installer/Dialogs/MyTroubleshootDialog.wxs @@ -0,0 +1,42 @@ + + + + + + + + 1 + + + + + !(wix.WixUICostingPopupOptOut) OR CostingComplete = 1 + + + + + + + + Installed AND NOT RESUME AND NOT Preselected AND NOT PATCH + + + + + diff --git a/windows/ovs-windows-installer/Dialogs/UserFinishDialog.wxs b/windows/ovs-windows-installer/Dialogs/UserFinishDialog.wxs new file mode 100644 index 000000000..397ee04b4 --- /dev/null +++ b/windows/ovs-windows-installer/Dialogs/UserFinishDialog.wxs @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + 1 + + + + + + + + + + + + + + \ No newline at end of file diff --git a/windows/ovs-windows-installer/Driver/.gitignore b/windows/ovs-windows-installer/Driver/.gitignore new file mode 100644 index 000000000..e9994b37d --- /dev/null +++ b/windows/ovs-windows-installer/Driver/.gitignore @@ -0,0 +1,5 @@ +* + +!.gitignore +!Win8 +!Win8.1 diff --git a/windows/ovs-windows-installer/Driver/Win8.1/.gitignore b/windows/ovs-windows-installer/Driver/Win8.1/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Driver/Win8.1/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/Driver/Win8/.gitignore b/windows/ovs-windows-installer/Driver/Win8/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Driver/Win8/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/License.rtf b/windows/ovs-windows-installer/License.rtf new file mode 100644 index 000000000..727e7cea0 --- /dev/null +++ b/windows/ovs-windows-installer/License.rtf @@ -0,0 +1,209 @@ +{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff0\deff0\stshfdbch31505\stshfloch31506\stshfhich31506\stshfbi0\deflang1033\deflangfe1033\themelang1033\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;} +{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0302020204030204}Calibri Light;} +{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;} +{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f39\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f40\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\f42\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f43\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f44\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f45\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\f46\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f47\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f49\fbidi \fswiss\fcharset238\fprq2 Arial CE;}{\f50\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;} +{\f52\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f53\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f54\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);}{\f55\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);} +{\f56\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f57\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f379\fbidi \froman\fcharset238\fprq2 Cambria Math CE;}{\f380\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;} +{\f382\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f383\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f386\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;}{\f387\fbidi \froman\fcharset163\fprq2 Cambria Math (Vietnamese);} +{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \fswiss\fcharset238\fprq2 Calibri Light CE;}{\fhimajor\f31529\fbidi \fswiss\fcharset204\fprq2 Calibri Light Cyr;} +{\fhimajor\f31531\fbidi \fswiss\fcharset161\fprq2 Calibri Light Greek;}{\fhimajor\f31532\fbidi \fswiss\fcharset162\fprq2 Calibri Light Tur;}{\fhimajor\f31535\fbidi \fswiss\fcharset186\fprq2 Calibri Light Baltic;} +{\fhimajor\f31536\fbidi \fswiss\fcharset163\fprq2 Calibri Light (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);} +{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;} +{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;} +{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}} +{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0; +\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\*\defchp \fs22\loch\af31506\hich\af31506\dbch\af31505 }{\*\defpap \ql \li0\ri0\sa160\sl259\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\upr{\stylesheet{\ql \li0\ri0\sa160\sl259\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs22\alang1025 +\ltrch\fcs0 \fs22\lang1033\langfe1033\loch\f31506\hich\af31506\dbch\af31505\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa160\sl259\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs22\alang1025 \ltrch\fcs0 \fs22\lang1033\langfe1033\loch\f31506\hich\af31506\dbch\af31505\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused +Normal Table;}}{\*\ud\uc0{\stylesheet{\ql \li0\ri0\sa160\sl259\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs22\alang1025 \ltrch\fcs0 +\fs22\lang1033\langfe1033\loch\f31506\hich\af31506\dbch\af31505\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa160\sl259\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs22\alang1025 \ltrch\fcs0 \fs22\lang1033\langfe1033\loch\f31506\hich\af31506\dbch\af31505\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused +Normal Table;}}}}{\*\rsidtbl \rsid2365717\rsid7145912\rsid7612545}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\operator alin.cloudbase} +{\creatim\yr2015\mo4\dy2\hr16\min46}{\revtim\yr2015\mo5\dy25\hr20\min39}{\version3}{\edmins0}{\nofpages1}{\nofwords86}{\nofchars492}{\nofcharsws577}{\vern57439}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}} +\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0\ltrsect +\widowctrl\ftnbj\aenddoc\trackmoves0\trackformatting1\donotembedsysfont0\relyonvml0\donotembedlingdata1\grfdocevents0\validatexml0\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors0\horzdoc\dghspace120\dgvspace120\dghorigin1701 +\dgvorigin1984\dghshow0\dgvshow3\jcompress\viewkind1\viewscale100\rsidroot7145912 \nouicompat \fet0{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0\ltrpar \sectd \ltrsect\linex0\sectdefaultcl\sftnbj {\*\pnseclvl1 +\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5 +\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang +{\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\qj \li0\ri0\nowidctlpar\tx959\tx1918\tx2877\tx3836\tx4795\tx5754\tx6713\tx7672\tx8631\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 +\af0\afs22\alang1025 \ltrch\fcs0 \fs22\lang1033\langfe1033\loch\af31506\hich\af31506\dbch\af31505\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\insrsid2365717 \hich\af1\dbch\af31505\loch\f1 +Licensed under the Apache License, Version 2.0 (the "License"); +\par \hich\af1\dbch\af31505\loch\f1 you may not use this file except in compliance with the License. +\par \hich\af1\dbch\af31505\loch\f1 You may obtain a copy of the License at +\par +\par }{\field{\*\fldinst {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\insrsid2365717 \hich\af1\dbch\af31505\loch\f1 HYPERLINK http://www.apache.org/licenses/LICENSE-2.0 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\insrsid7145912 {\*\datafield +00d0c9ea79f9bace118c8200aa004ba90b0200000003000000e0c9ea79f9bace118c8200aa004ba90b6e00000068007400740070003a002f002f007700770077002e006100700061006300680065002e006f00720067002f006c006900630065006e007300650073002f004c004900430045004e00530045002d0032002e00 +30000000795881f43b1d7f48af2c825dc485276300000000a5ab000069}}}{\fldrslt {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\insrsid2365717 \hich\af1\dbch\af31505\loch\f1 http://www.apache.org/licenses/LICENSE-2.0}}}\sectd \ltrsect\linex0\sectdefaultcl\sftnbj { +\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f1\fs18\insrsid2365717 +\par +\par \hich\af1\dbch\af31505\loch\f1 Un\hich\af1\dbch\af31505\loch\f1 less required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +\par \hich\af1\dbch\af31505\loch\f1 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +\par \hich\af1\dbch\af31505\loch\f1 See the License for the specific language governing\hich\af1\dbch\af31505\loch\f1 permissions and +\par \hich\af1\dbch\af31505\loch\f1 limitations under the License. +\par }\pard \ltrpar\qj \li0\ri0\sa200\sl276\slmult1\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 {\rtlch\fcs1 \af1\afs20 \ltrch\fcs0 \f1\fs20\lang16\langfe1033\langnp16\insrsid2365717 +\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a +9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad +5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6 +b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0 +0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6 +a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f +c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512 +0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462 +a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865 +6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b +4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b +4757e8d3f729e245eb2b260a0238fd010000ffff0300504b030414000600080000002100e67505bed10600008b1a0000160000007468656d652f7468656d652f +7468656d65312e786d6cec59cf8b1b3714be17fa3f0c7377fc6bc63f9638c11edb499b6c1262272547d9963dca6a466624efc68440498e8542695a7a68a0b71e +4adb4002bda4b7fe276953da14fa2ff449331e4bb6dcdd2c292c256b58c6f2f79e3ebdf7e67b9ad1f98bf722ea1ce2841316b7dcf2b992ebe078cc26249eb5dc +5bc37ea1e13a5ca07882288b71cb5d62ee5ebcf0fe7be7d19e0871841db08ff91e6ab9a110f3bd62918f6118f1736c8e63f86dca920809f89acc8a93041d81df +88162ba552ad182112bb4e8c22703bc4d12f8f9cebd3291963f7c2ca798fc20cb1e072604c9381748d330b0d3b39284b045ff28026ce21a22d17e699b0a321be +275c87222ee087965b527f6ef1c2f922dacb8ca8d861abd9f5d55f6697194c0e2a6ace6436ca27f53cdfabb573ff0a40c536ae57efd57ab5dc9f02a0f118569a +72d17dfa9d66a7eb67580d945e5a7c77ebdd6ad9c06bfeab5b9cdbbefc1878054afd7b5bf87e3f80281a78054af1fe16def3ea95c033f00a94e26b5bf87aa9dd +f5ea065e81424ae2832d74c9af5583d56a73c894d1cb5678d3f7faf54ae67c8d826ac8ab4b4e3165b1d8556b11bacb923e002490224162472ce7788ac650c501 +a2649410e72a998550787314330ec3a54aa95faac27ff9f1d4958a08dac348b396bc8009df1a927c1c3e4ec85cb4dc0fc1abab41fe7ef1fddf2f9e392f1f3e7f +f9f0a7978f1ebd7cf863eac8b0ba8ce2996ef5fadbcffe7af2b1f3e7b36f5e3ffec28ee73afeb71f3ef9f5e7cfed4058e93a04afbe7cfafbf3a7afbefaf48fef +1e5be0ed048d74f89044983bd7f091739345b030150293391e256f66310c11d12ddaf18ca318c9592cfe7b2234d0d79688220bae83cd08de4e40626cc04b8bbb +06e141982c04b178bc124606709f31da6189350a57e45c5a98878b78669f3c59e8b89b081ddae60e506ce4b7b79883b6129bcb20c406cd1b14c502cd708c8523 +7f6307185b5677871023aefb649c30cea6c2b9439c0e22d6900cc9c8a8a6b5d16512415e963682906f2336fbb79d0ea3b65577f1a18984bb02510bf921a64618 +2fa1854091cde51045540ff85524421bc9c13219ebb81e1790e919a6cce94d30e7369beb09ac574bfa1590177bdaf7e9323291892007369f5711633ab2cb0e82 +1045731b7640e250c77ec00fa044917383091b7c9f997788fc0e7940f1ce74df26d848f7f16a700b9455a7b42e10f9cb22b1e4f2126646fd0e96748ab0921a10 +7e43cf23121f2bee1bb2eeffb7b20e42faeaeb2796559d55416f27c47a475dde90f15db84df10e583221675fbbbb6811dfc070bb6c37b077d2fd4ebaddffbd74 +efba9fdfbe60af351ae45b6e15d3adbadab8473bf7ed5342e9402c29becad5d69d43679af46150daa967569c3fc7cd43b89477324c60e0660952364ec2c44744 +848310cd617f5f76a59319cf5ccfb833671cb6fd6ad8ea5be2e922da6793f471b55c968fa6a9787024d6e3253f1f87470d91a26bf5f52358ee5eb19da947e515 +0169fb2624b4c94c12550b89fa6a5006493d9843d02c24d4cade0a8ba6854543ba5fa56a8b0550cbb3025b2707365c2dd7f7c0048ce0890a513c91794a53bdca +ae4ae6dbccf4ae601a1500fb885505ac33dd945c772e4fae2e2db51364da20a1959b49424546f5301ea209ceaa538e9e84c69be6bab94ea9414f8642cd07a5b5 +a6516ffc1b8bd3e61aec36b581c6ba52d0d8396ab9b5aa0f253346f3963b85c77eb88ce6503b5c6e79119dc1bbb3b148d21bfe34ca324fb8e8221ea60157a293 +aa4144044e1c4aa2962b979fa781c64a4314b7720504e1cc926b82ac9c3572907433c9783ac563a1a75d1b91914ebf82c2a75a61fd55999f1e2c2dd902d23d08 +2747ce882e929b084accaf9765002784c3db9f721acd0981d799b990adeb6fa33165b2abbf4f5435948e233a0f51d65174314fe14aca733aea5b1e03ed5bb666 +08a81692ac118e66b2c1ea4135ba69de35520e3bbbeef14632729a68ae7ba6a12ab26bda55cc9861d506366279ba26afb15a8518344deff0a9746f4a6e73a575 +1bfb84bc4b40c0f3f859baee091a82466d3d99414d32de9661a9d9d9a8d93b560b3c86da499a84a6fab595db8db8e53dc23a1d0c9eaaf383dd66d5c2d074b5af +549156e71efad1041bdd05f1e8c24be005155ca5120e1e12041ba281da93a4b201b7c83d91dd1a70e52c12d272ef97fcb61754fca0506af8bd8257f54a8586df +ae16dabe5f2df7fc72a9dba93c80c622c2a8eca7672e7d78154597d9c98b1adf3a7d89566fdbce8d595464ea5ca5a888abd39772c5387d49cf529ca13c5e711d +02a273bf56e937abcd4eadd0acb6fb05afdb69149a41ad53e8d6827ab7df0dfc46b3ffc0750e15d86b5703afd66b146ae5202878b592a4df6816ea5ea5d2f6ea +ed46cf6b3fc8b631b0f2543eb258407815af0bff000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d652f74 +68656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f24 +51eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e3198 +720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528 +a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100e9de0fbfff0000001c0200001300000000000000000000000000 +000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b000000000000000000000000 +00300100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c00000000000000000000000000190200007468 +656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d0014000600080000002100e67505bed10600008b1a000016000000000000000000 +00000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b010000270000000000 +0000000000000000db0900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000d60a00000000} +{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d +617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169 +6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363 +656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e} +{\*\latentstyles\lsdstimax371\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 1; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 5; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 9; +\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 1;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 2;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 3; +\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 4;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 5;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 6; +\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 7;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 8;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Indent; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 header;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footer; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index heading;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of figures; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope return;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation reference; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 line number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 page number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote text; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of authorities;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 macro;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toa heading;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 5;\lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Closing; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Signature;\lsdsemihidden1 \lsdunhideused1 \lsdpriority1 \lsdlocked0 Default Paragraph Font;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 4; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Message Header;\lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Salutation; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Date;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Note Heading; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Block Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 FollowedHyperlink;\lsdqformat1 \lsdpriority22 \lsdlocked0 Strong; +\lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Document Map;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Plain Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 E-mail Signature; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Top of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Bottom of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal (Web);\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Acronym; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Cite;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Code;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Definition; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Keyboard;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Preformatted;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Sample;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Typewriter; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Variable;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation subject;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 No List;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 1; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Balloon Text;\lsdpriority39 \lsdlocked0 Table Grid; +\lsdsemihidden1 \lsdlocked0 Placeholder Text;\lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdpriority60 \lsdlocked0 Light Shading;\lsdpriority61 \lsdlocked0 Light List;\lsdpriority62 \lsdlocked0 Light Grid; +\lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdpriority65 \lsdlocked0 Medium List 1;\lsdpriority66 \lsdlocked0 Medium List 2;\lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdpriority68 \lsdlocked0 Medium Grid 2; +\lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdpriority70 \lsdlocked0 Dark List;\lsdpriority71 \lsdlocked0 Colorful Shading;\lsdpriority72 \lsdlocked0 Colorful List;\lsdpriority73 \lsdlocked0 Colorful Grid;\lsdpriority60 \lsdlocked0 Light Shading Accent 1; +\lsdpriority61 \lsdlocked0 Light List Accent 1;\lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 1; +\lsdsemihidden1 \lsdlocked0 Revision;\lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 1; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 1; +\lsdpriority72 \lsdlocked0 Colorful List Accent 1;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdpriority62 \lsdlocked0 Light Grid Accent 2; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 2; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 2; +\lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;\lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdpriority62 \lsdlocked0 Light Grid Accent 3; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 3; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 3; +\lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdpriority60 \lsdlocked0 Light Shading Accent 4;\lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdpriority62 \lsdlocked0 Light Grid Accent 4; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 4; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 4; +\lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdpriority61 \lsdlocked0 Light List Accent 5;\lsdpriority62 \lsdlocked0 Light Grid Accent 5; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 5; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 5; +\lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdpriority62 \lsdlocked0 Light Grid Accent 6; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 6; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 6; +\lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis; +\lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdsemihidden1 \lsdunhideused1 \lsdpriority37 \lsdlocked0 Bibliography; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;\lsdpriority41 \lsdlocked0 Plain Table 1;\lsdpriority42 \lsdlocked0 Plain Table 2;\lsdpriority43 \lsdlocked0 Plain Table 3;\lsdpriority44 \lsdlocked0 Plain Table 4; +\lsdpriority45 \lsdlocked0 Plain Table 5;\lsdpriority40 \lsdlocked0 Grid Table Light;\lsdpriority46 \lsdlocked0 Grid Table 1 Light;\lsdpriority47 \lsdlocked0 Grid Table 2;\lsdpriority48 \lsdlocked0 Grid Table 3;\lsdpriority49 \lsdlocked0 Grid Table 4; +\lsdpriority50 \lsdlocked0 Grid Table 5 Dark;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 1; +\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 1;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 1;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 1; +\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 1;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 2;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 2; +\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 2;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 2; +\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 3;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 3;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 3;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 3; +\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 3;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 4; +\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 4;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 4;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 4;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 4; +\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 4;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 5; +\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 5;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 5;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 5; +\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 5;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 6;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 6; +\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 6;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 6; +\lsdpriority46 \lsdlocked0 List Table 1 Light;\lsdpriority47 \lsdlocked0 List Table 2;\lsdpriority48 \lsdlocked0 List Table 3;\lsdpriority49 \lsdlocked0 List Table 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark; +\lsdpriority51 \lsdlocked0 List Table 6 Colorful;\lsdpriority52 \lsdlocked0 List Table 7 Colorful;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 List Table 2 Accent 1;\lsdpriority48 \lsdlocked0 List Table 3 Accent 1; +\lsdpriority49 \lsdlocked0 List Table 4 Accent 1;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 1;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 1; +\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 List Table 2 Accent 2;\lsdpriority48 \lsdlocked0 List Table 3 Accent 2;\lsdpriority49 \lsdlocked0 List Table 4 Accent 2; +\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 2;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 3; +\lsdpriority47 \lsdlocked0 List Table 2 Accent 3;\lsdpriority48 \lsdlocked0 List Table 3 Accent 3;\lsdpriority49 \lsdlocked0 List Table 4 Accent 3;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 3; +\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 4;\lsdpriority47 \lsdlocked0 List Table 2 Accent 4; +\lsdpriority48 \lsdlocked0 List Table 3 Accent 4;\lsdpriority49 \lsdlocked0 List Table 4 Accent 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 4;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 4; +\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 List Table 2 Accent 5;\lsdpriority48 \lsdlocked0 List Table 3 Accent 5; +\lsdpriority49 \lsdlocked0 List Table 4 Accent 5;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 5;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 5; +\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 List Table 2 Accent 6;\lsdpriority48 \lsdlocked0 List Table 3 Accent 6;\lsdpriority49 \lsdlocked0 List Table 4 Accent 6; +\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 6;}}{\*\datastore 010500000200000018000000 +4d73786d6c322e534158584d4c5265616465722e362e3000000000000000000000060000 +d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff090006000000000000000000000001000000010000000000000000100000feffffff00000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +fffffffffffffffffdfffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffffffffffff0c6ad98892f1d411a65f0040963251e500000000000000000000000000ca +63ca1197d001feffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff00000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000105000000000000}} \ No newline at end of file diff --git a/windows/ovs-windows-installer/Product.wxs b/windows/ovs-windows-installer/Product.wxs new file mode 100644 index 000000000..c1c4d4bb6 --- /dev/null +++ b/windows/ovs-windows-installer/Product.wxs @@ -0,0 +1,249 @@ + + + + + + + = 602)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "ALL" AND (&OpenvSwitchDriver = 3)]]> + "ALL" AND (&OpenvSwitchDriver = 3)]]> + + + + + "ALL" AND (&OpenvSwitchDriver = 3)]]> + "ALL" AND (&OpenvSwitchDriver = 3)]]> + + + "ALL" AND (&OpenvSwitchDriver = 3)]]> + "ALL" AND (&OpenvSwitchDriver = 3)]]> + + "ALL" AND (&OpenvSwitchDriver = 3)]]> + "ALL" AND (&OpenvSwitchDriver = 3)]]> + + + "ALL" AND (&OpenvSwitchDriver = 3)]]> + "ALL" AND (&OpenvSwitchDriver = 3)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/windows/ovs-windows-installer/Redist/.gitignore b/windows/ovs-windows-installer/Redist/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Redist/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/Services/.gitignore b/windows/ovs-windows-installer/Services/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Services/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/Symbols/.gitignore b/windows/ovs-windows-installer/Symbols/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Symbols/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/UI.wxs b/windows/ovs-windows-installer/UI.wxs new file mode 100644 index 000000000..5df4f4f59 --- /dev/null +++ b/windows/ovs-windows-installer/UI.wxs @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + 1 + + NOT Installed + Installed AND PATCH + + 1 + LicenseAccepted = "1" + + NOT Installed OR WixUI_InstallMode = "Change" + Installed AND NOT PATCH + Installed AND PATCH + + 1 + + + 1 + 1 + 1 + + 1 + 1 + 1 + 1 + + Installed + NOT Installed + + 1 + 1 + + + + + + diff --git a/windows/ovs-windows-installer/images/bannrbmp.bmp b/windows/ovs-windows-installer/images/bannrbmp.bmp new file mode 100644 index 0000000000000000000000000000000000000000..0d0baf328b338b7baa533d1a5e8c5655f924830d GIT binary patch literal 134958 zcmeI5`)^gn8iu)u7Kz+cL{mkN$W1&EL8%3l%K@T-kRpgd6A>^#QvneOMvY1cpaSIt zup(lKAXK0fLCPg6v|wlv@JLZE1zIlWZ#X}mS64EbthHu!?cHrHyH5jk*6f*YW*!pX z{muI3o1vrs*{(&iKYpIj-yZrqU60S`zs;L<*6(J`{@1oevyN?={rUIL3jO*oKM;Tb z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5J)M3D_5?h)LvX;wY9aC zm6dT@C(`ckT2Uf%q%8y>009Wx00e&h_1AUl)=il*Wze8OnVFew+USzWKYm)ZYSp1b zhesZHWXzZ`@4owPX=&-zt5^T(^?dTlCle-2sQ)O}uV3H2d-u$lGyC=H*S>vwZ$&Lz zw!HV=dxs7kx^Usbva+)JkAejVKmY;|XgUNgT)41w>C*oF`+J%a=lAWm-#&TrWC`eB zL`g2;{rBH5b^Gb3pOPLzin(UZn(p1Z*SVs{9($~?urTR?=m-G_KmYIj)r}S9&A9_~MJn8Lz0QNa&k@hLe|l;>3yMhNBk*AOHaf zq&ETiV$!BN?zqDxhzZq?8Z|0~igWGSwXR*edK1tV@$uuw6aBX+E-p4(vI#AuUAuNA zB_)XtM|%iB00Iz5Zvwyl_S=XNBN7snIvdMrUB7;PO1ORc>8JK^ue|a~LS}dF+?kP) zVOvP3tiIc~Z%=4M8bbgA5P(1$6F7YM@ICk3laQ3eZ=9ExCr`FM^wj;KhaR#=OvvR_ z2ySworahv*1JvieYJHYZUkE?|0uX3a1orINQ-4{pB_>$b8_D`a+iuvfA!zfp*ItW* zQJR~Tl@;usL`~nmee3xJAkKto0|5v?00NDfz>h!v7+*9M%c_W01)4tjt zNTIgLagbC!{P4p;nY-ndTTY!iRj19>t5*m6`f9p#>7oNE-MV$t0(~F(9`p0_>l~2= z5P$##Ads#Es;a8uY|q|x*Ihd5sJNP%ni?Mp#pfuL=;^1QZr;4PPhx_P@4fe)uX~)w zjT<)xO?fp_r%sKwU`G=LjYxTOb93zwBfaFEKYxD2o6rXzd=Qs2nmM0B00Iz5R|5L~ z|BxX=f@)&d++(@jYPrpmnyn(}HSe3Jf%e;@_(xccg} z>E6A2L;LRAw=cp6-?eL3XfG-u009UC>kp zB93Ownia=Rc6PQsF;`h(y~+(3G^kKvR~LO0Z11bl+fP?_UrQb#009U1h7gb^l0?hT?BsqsV2eGuBA=mAJYG>qGZ`&!0bEP*5;^`gHBAwr<_p z?xjSvbuQ}2kt22&yIf~q8bAO75P(2B5zysdwhEZClJJ<6IM=RUU9xxP6U&=DSCE3?{QUFJp>m)~lk~N^_T0>^!zG&a)J@5gE0e%WFTE52Ibx60HTk#S zemlZo5+7-Xp=0Te&LphS1p*L&00eGO0t%E3l>=2CI&?^jI(7coxb@vU(bVs(6)oj* zM*rD+a*K<`v(G*oI-K$jxw@!+*!9_`-xuL_UQYThIdmT7%z}02h zvSsDv<*wXBsd72DY}s<#ZMV7R@c#SnhfbSH2tWV=5J*b`u49RcpZU*)%0c<9V``<{ zEJ8Pxxz2SMM1e!0gh!7a9jaN1Cc1R#lDfEjSkoZXY@A9Zyqz>@Qt0SjrJT?dUMd=A zO0Qfs#K_O&}N((xv@$vq{MwL%U=E1v18*{qYVTg009W34FTVY z3?maJS?ezsDkgl<%gE*6f45z+e{S@j%ClwK;C*Io=klt3K~SzNxS<=K&p-dXnUY=5 zfz#iA|J@jO9qsJYsgviEnwHT8C4T?%%P*gL>M46HS6Oq26Nno#y5pH*MPF zTFBzXi|v-KvgB1Q@4N3lmz`j}t|W6=qaFeffB*#2l|WDqyc%sn z1R#*!1azfls5*T9`RAbvulh6ECsEy$wocs8`*igQ+0Gc&Y{T+YxuemWH6`q~g`N0stwL%&f3=R*wyAOHafq$PoY0|$m`#O&F#Bi5UQ zZb~}m?Z~-!@uGHFLr47f+i%T?+J==|7urnjs%hpc=mlkH7ghR1SsnFu(sd#x)X1H8 z-udB&A4YHa`sTwU1Rwwb2&5l@$&)9C%0yOHR>WeH(9K1ArYXs-atMSDr%F3M)-@|N z(`{m|i7B>6X@WxS>+p;1aw=NbrOUzY>Z|iCDa=0Pnm_drfB*y_(D(`H>dBx|s7Ah+ zPW`*)zwgvHn~toE__MS$*Sz=4UfD_)sd%Pb>4~~`?{3D@xUPefk3RaSX%<>|>#es! zm!nGKa!Np57g*bUC9Dcz+PZbCeIrchpj1Kt0uX>e(;=XnF^$Tu?l|_F9eUURhb`3SQL<#0J|7)oIO)j0}5?rmQ2Jx}4ffm;wYK009Unju%7MclDfB*y_ z009U@5YX#9pNN=8y^#z(FBxIY_mO-_SLPp$l4uH9pYcQ=4<0;dmSz_e1teEA+8QCX z(R}z60uX>e(;*<2^MMB*uyw>PckI}4_wL=v(U7M(e*Adbnyxf`t;Fx#yDiEs;ctx@@~HQW}gN7QSrLI^z3mg*T0SKv$m&U$_l1EaNt0w{f!$p z`jX`;TVPBSp$4gh00bZafixf>AL{YPA2&MUD(IZZ!i5W!rZsq8L*AT@SdJMp#uYki zY$~t9=FOY`7HARs#tOAn<^-K-H51b*&NttD)0G==)TmMC&Yd%jDL?=M5P(2aBcKDD zJ$m#o>K0v)n&_>l?i0~=s}i2KZ{I$;z5S`l-qY}em8s93NwBO$5OOEAD1AflxtYGk zT6Lm(*A2g3MuY$aAOL~Clz`IQx$d&`sYY^-@-!Rr;7KSeW#3J1@^&xDYe`8-sC_CS z009UkKz;b|VOREp_$yUDzy_EqLqGrm5P-l9K|t>= z-+1EedJrfsE>6jw zt&Xwe=H}}1%|?Z#prAk#^9f>{M~PUELIu3ftt`Er;Z1Rwwb2sA1J+MiSsfEhDp3>!8~ zT4V0hl%6UbpU$Mb@WKm=7cc(dhaVa^5l}rTX|~ctPKKs04eV(0t z_St*PgsnE(XwU~gd*#pY{Q2MY1`XOLf7Tnc)d$&w-q~=YK|?kf^x>ZmGxF;{eh3gC zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7e?M_OR~`0=~$y6Z>!7Tp{H0t7y? z0%G___OI5B5g_niARvbS7yKo=MFIpqvI1iGNA|DQjS(QQ?h7njxbVk6{_*a+?>=nU zu#9^jfBf-({p(+=w^hXOIdkTW9z8l`MvfeL=bd+!1s;F=@k0(dWV`LQ%eiNqamE{O zypcn*X3aY0m}8!M?zy7aV~;(SkMrlxFVAX8{O)(ZTe)&&HS>{29?5mL-FDkucG)G> zzWL^x)uT7vbkp7JAC-? z(@r}r`M2D1OSQmVciokbOP4M!Pmv%%fWW{Mc>n$P&pGFu_19njV;}q2-h1zT^wCEj zc;JB>eYoG)q)C(Be*5jB*@>k?h78$!^Uc5g?Qj3|r$7Dt=RcqFH{X17jwUz{9z1xf zt+qP#)KiZ=_Sg+K+;HcecYf`)*RHwdnruThckbMxhQ!Hyy#N0DYaJ3WKkiG z<;3)j ze=ko@5YD;9#~=OZN9BfSX;n%lfS+)}35Onf=oVXSkunh|3*_TP7hRN(&p!KXS(^j_ z0t5!8K>D|dR|gz$z@kNq$`-!z$}8Xc*0++M_?aM|{KW8lyzREziZTh&+i$;pB6&WJ z8Z|1h{pFWmE)Ly&_uYH#wO2a!J&NH)Gq%}gn|I!Mr>JMAop$=nXFgMQ<1fAR(lp@0 z3ok5={^1XQ$j3zKWy_W&=di;L%g1V8x;BQt`s%Csrr3Aiear8rhaY}8r;Fi*s^0_% ztXl%SqhES?U&pr3t9L;D(I^h#1PE1s<{;XKB zqWc)06BCUyvXKgNR<4rKn0yhOefHVq72kdL-HkWiIAb8mDLUa9V9BKu+tXprza#e8 zV~_kUyW+JmJi{-!M80LpYKw#z-U8wy0RroUz;VYNml*!cGtbnvHsjA3YDjF%(ZujC zed$ZpVi~?pJbdb@r}De~=ZY(?=st!QA@s>7pZv;KzLLIu?H5Xh%0KzZPab^m!D&M= zEOXtE?(mtFDN7(Q*KQFJAK_E zk$792GJX2=#Lt#Tr8$(y`KeERYV6ptN#Ah84JlJ}#mfsNk*g#o=g=jWT#~V!e48Y6 zr#Jq@6Hg>FBlExb#V=Cq#1l^}yW+JmJYN$jf99EImX#MzQHvNpZ{EBj|Ge|gOR)?n zmBl0o5FjuB1&Swz9U7L=j6|~xm?VZLRPM9SJ_&;_z4X%L$&-_xZg9>m(g}nA{`bHC z_P4*~fo7g>CN>r^e9t}iOg}$0NW6t(zrS-g00Vt7XKbEu5r8Bxy&Ois^<8EMJ< znIh(A#6FKrGtDN2ix{3k^|Ui(G8A7t>ntweHvs|!2ByI0KKHo{J9lW?cfb4HM8C`i zNes`t!IMrpDIbfUgAO{Vc%GQJdel)z{eSkLp+kp0^w2}8vxwmnCQK*~W%^>vwTj`I z5hF&F`5A;s`8={r&eW+>H`!#9e9Rb2a?%4YI^m!G^ry=SDw!LT`7`C|#lH>`x62q_ zG^1!k21zo~e#8++lui2VXFr>Bix^%!#?7I8*JPxqtW$yj0RjV4Ah9>&Ax#sZGcKLy zn8jdA`p+-E_+suN-P9Q~W@Hef<&Kh{Su2@3SWfv!e9q`fB76?rdh4y}qPLuqj~S&& znM@Zc59OlCNt0&IoSAT%t7n#Yc{Kg=9IbZd^Q<%Vmj#N8=67l+=CmYT1m~Ri`s=S3 zC3EOcfBMtC_ugB44HYpwhtkM2wOV0WvjhPG1O}QwUc*V8EQUnd+g3cgXf)Z zL_fSsRmBJp7?=VX8P2SaOu$au%QTNn_PGA~>$~stuA!UCsC(YB&s3AlB*`@O;+@Ja zua#GmvKC71RP**4#PD_#IRpr-ivs0*kAc(p#Nqs7VU4y&3}2(XDkngIz#0|E$ayAD zuF*@VoB#m=1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+0D*oG=yIxFpSL9QQZik<&#O&^ zYwM2I#J#AK009D}K-MJ5O0ZeYHCy~-)s$lGsmvbF`YKt|@5^8Qa#oJ|)vtb)wYall zNqN>9N@R1)tYNkKm1o9#7Qf97Zdpw+dk^Jy@{{@JS^I0um@(&{e}0GUK3D(hY9K&> zz?v4w3Q^hAG8=0a@vWQRSw%3fQnqib&{s`OS2+vPmJ5UTuG&QJy!DfHaaVaqU;XM= zv#3|^uHq~L1PH7xfxL}=)>&s|Me$WexuybHTqP@V_j&uv#bb+w!Fy4ERxIq%w^D8} z|N6*U;=Q<*QwR_s(9Z(tqb5#f1LCIO+@t)%4?nyQqhD6vDpo#88#0VjEGOF|pX@GC z4x9AoMzXt5)^+UBwG<{mfIvSBWb>V@l+vT%w#vfk%xCxEKDm*+L0P?AR^{qZwGTe{ zVD?R}*3hGL&Mg;C?9p`UojTs7Rar!#`RITiRLds`K@)e?8j=w}iX)kH7x)uUk&<=3{o}Y_F=w z$-X?DYp6a6ZLcKX0$EzT`gk-C1+r~WhmrFyeBlea;+@SMB|xBm1QLrzj~?9~*{V5P zZ@qP9s&w-XW5$Y)Jo3oux$Wuf>DG%09_jYCS5@RB2siEHTo~p-WYf8s&0gJPxoSRV z&YTYaHkmzpcD1lH0RjZpia=iBX^L+}e&&AUO`jgMEz`oYS6YW^Gip)YuXB}^-*Lws z9cs&=bfh~}a>5BGG@U+k=FAQ!cIfpi%Qno5{*H_twjQN-4tBEvemFFrzfB=CtDvRDwv9sb-)wPcz8;~)Q67Hdjm zZI-fpHIa9qnof~VfB=Dh5J<;0fwUTzs_86XRc*s_uYIs1d{2LtELqawX=%2n@20}+ z$Xsn$Q##XQYIm#8l$)kga%hze$Y^=f>8m_f0RjXFtT};;F1n~GI*l4NsygrD_GhL} zykUplwIuUcny!+r>?)s=W?jLi)7$fF$7kMnwjs z@#jDPxpTGoHfcGf!^ftsczKDuVf4fkPh54?Re4am+its6=M$D?S`zo%b5HFR^RS?$ zq>ls$5a>67fBy5IwV^4QR0)c=3%zN8Pkj5y10%pR3nTSG@Co zkGybKQ`-(b^w9PTB`5EDHl33E@#DugoptQ7$JQRoh7>)W3E80mStG0I+h^RkaUD){ zhyVcsYgFJn-}z2$4BU0sU31;XU%G$nv@Oa2-euy!#kVxIIa6F}FPF^BLaaUO@y8!; zI%Ud~DGL@XXnH5Kue#!Q-F4Tdn{L|Fh6^veu(sJU2@oLA4+2fop7ZSQZD z<94-JhZvr>*PABz<)LTo{Z5!Lp*DWzN$6W|y_Hj%`u1gk+G@MW%mOUOAAfx9)r-t@ z&iimt^lqA@3;_ZJssdU3q&7xffBp3-Hut5O2TdDNjnCbr6T>qC^UCrUs~0NM88)fC z;?-AQU6v^m8P93zW>20xxhVGV!w=V1n9NMMDXZ#HBHjDE)R~v}n`$odCr+H$qiPi< zK!8Bs3bf3!D??Vfc*sj{K6}i~yOyDQ)cwq??5ES7)63iGK87E2%rUji%Yuh39pJo(lm*Od3+MgN zl`B`4S8q>buJ{#KT+uY9R9j~<`|5#ddwb*%AV8pdfu^yPM<0E(y^>ka&6sraF3CUr zp7G`AN;io!?>+48cb69xju|tiHZZn~r7T*ssHqcv)KN#Zx9z&?uB)v!nOTyty?l`~ zfByUqYq```m{*@JyX>;23gr_ZK%hSbvLtwIq`L9O8*7`f^us5WJ&oas^T*z@+mdC^ zr_St~P5 zdlADge(0nux{~*0YA=_}>O7OCR;5^0plxbco=fKS%-Z@hp|RQ(&#arK?}+x(b8hA+ z_TgKfP3@OYfB=D>3S{Z*_NbMd^iex(F+AtRdv9NOO^5$3fAq9+oP57)Hpqg9MZ=n2 z3*C9=oy(Tzx=j?d>G9?20FtH*E${JvTujkOn* zNq_)>wIz_1y_;U5IrrRid)=yHZehas`0IwwTk=p*PnPv=dTITnlTIqHm934dv9y|A zOA_#YGKz5X9Kex!qNKVe}u#9aMt<5~c z>ZnWSYdK1Q0D-kGFm>wGYB;T?PdxF&-Zd{VeDVX6R=&5QJbm%v#Z8kyGT_{A;}w|= zK6vop>OGd}OcBpU4`qWT2oNB!P6(`6u_EuJm!YgBap#?P_Tny9y!*jwROL^mbL_e2 zo-H+(ANzB(-W>J9lnN=q^6y31+(JwWpMsrlyn>5(EegD1mgvx8HvI+TfPVAwz~_JImfRI6d%5 zlO{FQocwFAy|#BX_wKBGA2l^**|KFF>dcMgsc2@Zyzs&c9f~c* zf9YkP79Sr85LoL1+4rF-z7_c+MvTaO>2BJ&bm`L5Pd|Ob4L9siVdlNGEM3)2&8uG~ zdqdQ=Dw9&OXHaeZx7~JI%i=9rF`!F|+6rY7AV8qc1=6?t+Sk5T8{ev#nfrJC`R8ZZ zkoGxv>E&h$_Yp@N(KH36tS9df-EqeqeSQnwRM=r`CsS7PmQd!*m-S?p_|Ja!v!?X{ z4nO?x*I$3VEG$8Q0D-kDkO|?#hYv3!ZRbQHTSn2d?q7yPvKZ9nn{VE^eD!Gg)@1+R zZ{F6gwxm6sm!8ti&Q0aps-^qgQaE?r)A6PLzh74*K;XYhAg{VNt;O9E!Fu;GJ@EdV zOj!GOm(^f;*ZAsL3H7sP&8n?mCIJEj)}laq<4wDpRfAYB(itIN<@@seaN|!u{dCit zKHc9`)&%H>rn&?H0t9*?kg@JF&N!oKDV6R6UA5$nJMOreQ_XvEBTc7dv9BJjJzKqz zq_aSQsVPmi@(By=~% zlz_$%An=hFNQZpNlqp%tubbf3UM9~3k390o1q&7oOsK~mdn^ww+ne2#lYf|G2t^Nk zVE%1t6#)WG0*T}q$3FPrgS+pzChVVm_Sw%q`)pI410+AUmJK(v?p9OWF7vbC;q2M7 z2S~Fug8%^ny%5M4LSA?|<&;y#jvbr0SFQ)1*q&GNGtcnoqmRy>Xiq-*WIw-v*^7Ir zo|4ukKIgrp{rBI$+~YnsmGtP*qt83 F{|D`Sps@e| literal 0 HcmV?d00001 diff --git a/windows/ovs-windows-installer/ovs-windows-installer.wixproj b/windows/ovs-windows-installer/ovs-windows-installer.wixproj new file mode 100644 index 000000000..af22f4274 --- /dev/null +++ b/windows/ovs-windows-installer/ovs-windows-installer.wixproj @@ -0,0 +1,79 @@ + + + + Debug + x86 + 3.8 + 259905a2-7434-4190-8a33-8fba67171dd6 + 2.0 + OpenvSwitch + Package + $(MSBuildExtensionsPath32)\Microsoft\WiX\v3.x\Wix.targets + $(MSBuildExtensionsPath)\Microsoft\WiX\v3.x\Wix.targets + + + bin\$(Configuration)\ + obj\$(Configuration)\ + Debug + + + bin\$(Configuration)\ + obj\$(Configuration)\ + BinariesPath=Binaries;SymbolsPath=Symbols; + False + False + 1076; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(WixExtDir)\WixUtilExtension.dll + WixUtilExtension + + + $(WixExtDir)\WixUIExtension.dll + WixUIExtension + + + + + + + + + + + \ No newline at end of file From 50dcbd8ed473210e6d2aa44f28843fb417416397 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 15 May 2015 17:03:17 -0700 Subject: [PATCH 116/146] ofp-util: Convert flow_metadata to match structure. We have a special flow_metadata structure to represent the parts of a packet that aren't carried in the payload itself. This is used in the case where we need to send the packet as a Packet In to an OpenFlow controller. This is a subset of the more general struct flow. In practice, almost all operations we do on this structure involve converting it to or from a match or have code that is the same as a match. Serialization to NXM and back is done as a match. There is special flow_metadata formatting code that is almost identical to match formatting. The uses for struct flow_metadata aren't performance critical when it comes to memory, so we can save quite a bit of code by just using a match structure directly instead. In addition, as metadata increases and becomes more complex (Geneve options require some special handling beyond just additional fields), using the match structure means we only have to do this work in one place. Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- lib/flow.c | 48 +++++++++++----- lib/flow.h | 18 +----- lib/learning-switch.c | 8 +-- lib/ofp-print.c | 39 +------------ lib/ofp-util.c | 100 +++++++--------------------------- lib/ofp-util.h | 2 +- ofproto/connmgr.c | 4 +- ofproto/fail-open.c | 3 +- ofproto/ofproto-dpif-upcall.c | 2 +- ofproto/ofproto-dpif-xlate.c | 2 +- tests/ofp-print.at | 4 +- tests/ofproto-dpif.at | 48 ++++++++-------- tests/ofproto.at | 8 +-- 13 files changed, 100 insertions(+), 186 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index 0f9ee504c..7dd0dfd80 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -758,23 +758,45 @@ flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc) } } -/* Initializes 'fmd' with the metadata found in 'flow'. */ +/* Initializes 'flow_metadata' with the metadata found in 'flow'. */ void -flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) +flow_get_metadata(const struct flow *flow, struct match *flow_metadata) { + int i; + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 31); - fmd->dp_hash = flow->dp_hash; - fmd->recirc_id = flow->recirc_id; - fmd->tun_id = flow->tunnel.tun_id; - fmd->tun_src = flow->tunnel.ip_src; - fmd->tun_dst = flow->tunnel.ip_dst; - fmd->gbp_id = flow->tunnel.gbp_id; - fmd->gbp_flags = flow->tunnel.gbp_flags; - fmd->metadata = flow->metadata; - memcpy(fmd->regs, flow->regs, sizeof fmd->regs); - fmd->pkt_mark = flow->pkt_mark; - fmd->in_port = flow->in_port.ofp_port; + match_init_catchall(flow_metadata); + if (flow->tunnel.tun_id != htonll(0)) { + match_set_tun_id(flow_metadata, flow->tunnel.tun_id); + } + if (flow->tunnel.ip_src != htonl(0)) { + match_set_tun_src(flow_metadata, flow->tunnel.ip_src); + } + if (flow->tunnel.ip_dst != htonl(0)) { + match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst); + } + if (flow->tunnel.gbp_id != htons(0)) { + match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id); + } + if (flow->tunnel.gbp_flags) { + match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags); + } + if (flow->metadata != htonll(0)) { + match_set_metadata(flow_metadata, flow->metadata); + } + + for (i = 0; i < FLOW_N_REGS; i++) { + if (flow->regs[i]) { + match_set_reg(flow_metadata, i, flow->regs[i]); + } + } + + if (flow->pkt_mark != 0) { + match_set_pkt_mark(flow_metadata, flow->pkt_mark); + } + + match_set_in_port(flow_metadata, flow->in_port.ofp_port); } char * diff --git a/lib/flow.h b/lib/flow.h index dcb5bb030..70554e414 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -34,6 +34,7 @@ struct flow_wildcards; struct minimask; struct dp_packet; struct pkt_metadata; +struct match; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion @@ -179,26 +180,11 @@ BUILD_ASSERT_DECL(FLOW_SEGMENT_3_ENDS_AT < sizeof(struct flow)); extern const uint8_t flow_segment_u64s[]; -/* Represents the metadata fields of struct flow. */ -struct flow_metadata { - uint32_t dp_hash; /* Datapath computed hash field. */ - uint32_t recirc_id; /* Recirculation ID. */ - ovs_be64 tun_id; /* Encapsulating tunnel ID. */ - ovs_be32 tun_src; /* Tunnel outer IPv4 src addr */ - ovs_be32 tun_dst; /* Tunnel outer IPv4 dst addr */ - ovs_be16 gbp_id; /* Group policy ID */ - uint8_t gbp_flags; /* Group policy flags */ - ovs_be64 metadata; /* OpenFlow 1.1+ metadata field. */ - uint32_t regs[FLOW_N_REGS]; /* Registers. */ - uint32_t pkt_mark; /* Packet mark. */ - ofp_port_t in_port; /* OpenFlow port or zero. */ -}; - void flow_extract(struct dp_packet *, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *); -void flow_get_metadata(const struct flow *, struct flow_metadata *); +void flow_get_metadata(const struct flow *, struct match *flow_metadata); char *flow_to_string(const struct flow *); void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t), diff --git a/lib/learning-switch.c b/lib/learning-switch.c index bad354883..3c8536dcc 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -625,14 +625,14 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) /* Extract flow data from 'opi' into 'flow'. */ dp_packet_use_const(&pkt, pi.packet, pi.packet_len); flow_extract(&pkt, &flow); - flow.in_port.ofp_port = pi.fmd.in_port; - flow.tunnel.tun_id = pi.fmd.tun_id; + flow.in_port.ofp_port = pi.flow_metadata.flow.in_port.ofp_port; + flow.tunnel.tun_id = pi.flow_metadata.flow.tunnel.tun_id; /* Choose output port. */ out_port = lswitch_choose_destination(sw, &flow); /* Make actions. */ - queue_id = get_queue_id(sw, pi.fmd.in_port); + queue_id = get_queue_id(sw, pi.flow_metadata.flow.in_port.ofp_port); ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); if (out_port == OFPP_NONE) { /* No actions. */ @@ -655,7 +655,7 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) po.packet = NULL; po.packet_len = 0; } - po.in_port = pi.fmd.in_port; + po.in_port = pi.flow_metadata.flow.in_port.ofp_port; po.ofpacts = ofpacts.data; po.ofpacts_len = ofpacts.size; diff --git a/lib/ofp-print.c b/lib/ofp-print.c index d773dca4f..96e65a7e7 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -104,7 +104,6 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, char reasonbuf[OFPUTIL_PACKET_IN_REASON_BUFSIZE]; struct ofputil_packet_in pin; int error; - int i; error = ofputil_decode_packet_in(&pin, oh); if (error) { @@ -120,43 +119,9 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, ds_put_format(string, " cookie=0x%"PRIx64, ntohll(pin.cookie)); } - ds_put_format(string, " total_len=%"PRIuSIZE" in_port=", pin.total_len); - ofputil_format_port(pin.fmd.in_port, string); + ds_put_format(string, " total_len=%"PRIuSIZE" ", pin.total_len); - if (pin.fmd.tun_id != htonll(0)) { - ds_put_format(string, " tun_id=0x%"PRIx64, ntohll(pin.fmd.tun_id)); - } - - if (pin.fmd.tun_src != htonl(0)) { - ds_put_format(string, " tun_src="IP_FMT, IP_ARGS(pin.fmd.tun_src)); - } - - if (pin.fmd.tun_dst != htonl(0)) { - ds_put_format(string, " tun_dst="IP_FMT, IP_ARGS(pin.fmd.tun_dst)); - } - - if (pin.fmd.gbp_id != htons(0)) { - ds_put_format(string, " gbp_id=%"PRIu16, - ntohs(pin.fmd.gbp_id)); - } - - if (pin.fmd.gbp_flags) { - ds_put_format(string, " gbp_flags=0x%02"PRIx8, pin.fmd.gbp_flags); - } - - if (pin.fmd.metadata != htonll(0)) { - ds_put_format(string, " metadata=0x%"PRIx64, ntohll(pin.fmd.metadata)); - } - - for (i = 0; i < FLOW_N_REGS; i++) { - if (pin.fmd.regs[i]) { - ds_put_format(string, " reg%d=0x%"PRIx32, i, pin.fmd.regs[i]); - } - } - - if (pin.fmd.pkt_mark != 0) { - ds_put_format(string, " pkt_mark=0x%"PRIx32, pin.fmd.pkt_mark); - } + match_format(&pin.flow_metadata, string, OFP_DEFAULT_PRIORITY); ds_put_format(string, " (via %s)", ofputil_packet_in_reason_to_string(pin.reason, reasonbuf, diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 0f9a38d85..9004b8d30 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -3303,24 +3303,6 @@ ofputil_encode_flow_removed(const struct ofputil_flow_removed *fr, return msg; } -static void -ofputil_decode_packet_in_finish(struct ofputil_packet_in *pin, - struct match *match, struct ofpbuf *b) -{ - pin->packet = b->data; - pin->packet_len = b->size; - - pin->fmd.in_port = match->flow.in_port.ofp_port; - pin->fmd.tun_id = match->flow.tunnel.tun_id; - pin->fmd.tun_src = match->flow.tunnel.ip_src; - pin->fmd.tun_dst = match->flow.tunnel.ip_dst; - pin->fmd.gbp_id = match->flow.tunnel.gbp_id; - pin->fmd.gbp_flags = match->flow.tunnel.gbp_flags; - pin->fmd.metadata = match->flow.metadata; - memcpy(pin->fmd.regs, match->flow.regs, sizeof pin->fmd.regs); - pin->fmd.pkt_mark = match->flow.pkt_mark; -} - enum ofperr ofputil_decode_packet_in(struct ofputil_packet_in *pin, const struct ofp_header *oh) @@ -3335,7 +3317,6 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT13_PACKET_IN || raw == OFPRAW_OFPT12_PACKET_IN) { const struct ofp13_packet_in *opi; - struct match match; int error; size_t packet_in_size; @@ -3346,7 +3327,7 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, } opi = ofpbuf_pull(&b, packet_in_size); - error = oxm_pull_match_loose(&b, &match); + error = oxm_pull_match_loose(&b, &pin->flow_metadata); if (error) { return error; } @@ -3364,7 +3345,8 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, pin->cookie = opi->cookie; } - ofputil_decode_packet_in_finish(pin, &match, &b); + pin->packet = b.data; + pin->packet_len = b.size; } else if (raw == OFPRAW_OFPT10_PACKET_IN) { const struct ofp10_packet_in *opi; @@ -3373,12 +3355,14 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, pin->packet = opi->data; pin->packet_len = b.size; - pin->fmd.in_port = u16_to_ofp(ntohs(opi->in_port)); + match_init_catchall(&pin->flow_metadata); + match_set_in_port(&pin->flow_metadata, u16_to_ofp(ntohs(opi->in_port))); pin->reason = opi->reason; pin->buffer_id = ntohl(opi->buffer_id); pin->total_len = ntohs(opi->total_len); } else if (raw == OFPRAW_OFPT11_PACKET_IN) { const struct ofp11_packet_in *opi; + ofp_port_t in_port; enum ofperr error; opi = ofpbuf_pull(&b, sizeof *opi); @@ -3387,21 +3371,22 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, pin->packet_len = b.size; pin->buffer_id = ntohl(opi->buffer_id); - error = ofputil_port_from_ofp11(opi->in_port, &pin->fmd.in_port); + error = ofputil_port_from_ofp11(opi->in_port, &in_port); if (error) { return error; } + match_init_catchall(&pin->flow_metadata); + match_set_in_port(&pin->flow_metadata, in_port); pin->total_len = ntohs(opi->total_len); pin->reason = opi->reason; pin->table_id = opi->table_id; } else if (raw == OFPRAW_NXT_PACKET_IN) { const struct nx_packet_in *npi; - struct match match; int error; npi = ofpbuf_pull(&b, sizeof *npi); - error = nx_pull_match_loose(&b, ntohs(npi->match_len), &match, NULL, - NULL); + error = nx_pull_match_loose(&b, ntohs(npi->match_len), + &pin->flow_metadata, NULL, NULL); if (error) { return error; } @@ -3417,7 +3402,8 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, pin->buffer_id = ntohl(npi->buffer_id); pin->total_len = ntohs(npi->total_len); - ofputil_decode_packet_in_finish(pin, &match, &b); + pin->packet = b.data; + pin->packet_len = b.size; } else { OVS_NOT_REACHED(); } @@ -3425,45 +3411,6 @@ ofputil_decode_packet_in(struct ofputil_packet_in *pin, return 0; } -static void -ofputil_packet_in_to_match(const struct ofputil_packet_in *pin, - struct match *match) -{ - int i; - - match_init_catchall(match); - if (pin->fmd.tun_id != htonll(0)) { - match_set_tun_id(match, pin->fmd.tun_id); - } - if (pin->fmd.tun_src != htonl(0)) { - match_set_tun_src(match, pin->fmd.tun_src); - } - if (pin->fmd.tun_dst != htonl(0)) { - match_set_tun_dst(match, pin->fmd.tun_dst); - } - if (pin->fmd.gbp_id != htons(0)) { - match_set_tun_gbp_id(match, pin->fmd.gbp_id); - } - if (pin->fmd.gbp_flags) { - match_set_tun_gbp_flags(match, pin->fmd.gbp_flags); - } - if (pin->fmd.metadata != htonll(0)) { - match_set_metadata(match, pin->fmd.metadata); - } - - for (i = 0; i < FLOW_N_REGS; i++) { - if (pin->fmd.regs[i]) { - match_set_reg(match, i, pin->fmd.regs[i]); - } - } - - if (pin->fmd.pkt_mark != 0) { - match_set_pkt_mark(match, pin->fmd.pkt_mark); - } - - match_set_in_port(match, pin->fmd.in_port); -} - static struct ofpbuf * ofputil_encode_ofp10_packet_in(const struct ofputil_packet_in *pin) { @@ -3474,7 +3421,7 @@ ofputil_encode_ofp10_packet_in(const struct ofputil_packet_in *pin) htonl(0), pin->packet_len); opi = ofpbuf_put_zeros(packet, offsetof(struct ofp10_packet_in, data)); opi->total_len = htons(pin->total_len); - opi->in_port = htons(ofp_to_u16(pin->fmd.in_port)); + opi->in_port = htons(ofp_to_u16(pin->flow_metadata.flow.in_port.ofp_port)); opi->reason = pin->reason; opi->buffer_id = htonl(pin->buffer_id); @@ -3488,17 +3435,13 @@ ofputil_encode_nx_packet_in(const struct ofputil_packet_in *pin) { struct nx_packet_in *npi; struct ofpbuf *packet; - struct match match; size_t match_len; - ofputil_packet_in_to_match(pin, &match); - /* The final argument is just an estimate of the space required. */ packet = ofpraw_alloc_xid(OFPRAW_NXT_PACKET_IN, OFP10_VERSION, - htonl(0), (sizeof(struct flow_metadata) * 2 - + 2 + pin->packet_len)); + htonl(0), NXM_TYPICAL_LEN + 2 + pin->packet_len); ofpbuf_put_zeros(packet, sizeof *npi); - match_len = nx_put_match(packet, &match, 0, 0); + match_len = nx_put_match(packet, &pin->flow_metadata, 0, 0); ofpbuf_put_zeros(packet, 2); ofpbuf_put(packet, pin->packet, pin->packet_len); @@ -3523,7 +3466,7 @@ ofputil_encode_ofp11_packet_in(const struct ofputil_packet_in *pin) htonl(0), pin->packet_len); opi = ofpbuf_put_zeros(packet, sizeof *opi); opi->buffer_id = htonl(pin->buffer_id); - opi->in_port = ofputil_port_to_ofp11(pin->fmd.in_port); + opi->in_port = ofputil_port_to_ofp11(pin->flow_metadata.flow.in_port.ofp_port); opi->in_phy_port = opi->in_port; opi->total_len = htons(pin->total_len); opi->reason = pin->reason; @@ -3539,7 +3482,6 @@ ofputil_encode_ofp12_packet_in(const struct ofputil_packet_in *pin, enum ofputil_protocol protocol) { struct ofp13_packet_in *opi; - struct match match; enum ofpraw packet_in_raw; enum ofp_version packet_in_version; size_t packet_in_size; @@ -3555,14 +3497,12 @@ ofputil_encode_ofp12_packet_in(const struct ofputil_packet_in *pin, packet_in_size = sizeof (struct ofp13_packet_in); } - ofputil_packet_in_to_match(pin, &match); - /* The final argument is just an estimate of the space required. */ packet = ofpraw_alloc_xid(packet_in_raw, packet_in_version, - htonl(0), (sizeof(struct flow_metadata) * 2 - + 2 + pin->packet_len)); + htonl(0), NXM_TYPICAL_LEN + 2 + pin->packet_len); ofpbuf_put_zeros(packet, packet_in_size); - oxm_put_match(packet, &match, ofputil_protocol_to_ofp_version(protocol)); + oxm_put_match(packet, &pin->flow_metadata, + ofputil_protocol_to_ofp_version(protocol)); ofpbuf_put_zeros(packet, 2); ofpbuf_put(packet, pin->packet, pin->packet_len); diff --git a/lib/ofp-util.h b/lib/ofp-util.h index efb5b18f0..549f1183a 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -413,7 +413,7 @@ struct ofputil_packet_in { const void *packet; size_t packet_len; /* Number of bytes in 'packet'. */ size_t total_len; /* Size of packet, pre-truncation. */ - struct flow_metadata fmd; + struct match flow_metadata; /* Identifies a buffer in the switch that contains the full packet, to * allow the controller to reference it later without having to send the diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 495364fe4..1fee86084 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1810,7 +1810,7 @@ schedule_packet_in(struct ofconn *ofconn, struct ofproto_packet_in pin, } else { pin.up.buffer_id = pktbuf_save(ofconn->pktbuf, pin.up.packet, pin.up.packet_len, - pin.up.fmd.in_port); + pin.up.flow_metadata.flow.in_port.ofp_port); } /* Figure out how much of the packet to send. @@ -1823,7 +1823,7 @@ schedule_packet_in(struct ofconn *ofconn, struct ofproto_packet_in pin, /* Make OFPT_PACKET_IN and hand over to packet scheduler. */ pinsched_send(ofconn->schedulers[pin.up.reason == OFPR_NO_MATCH ? 0 : 1], - pin.up.fmd.in_port, + pin.up.flow_metadata.flow.in_port.ofp_port, ofputil_encode_packet_in(&pin.up, ofconn_get_protocol(ofconn), ofconn->packet_in_format), diff --git a/ofproto/fail-open.c b/ofproto/fail-open.c index c8d553e46..4abc66e48 100644 --- a/ofproto/fail-open.c +++ b/ofproto/fail-open.c @@ -130,7 +130,8 @@ send_bogus_packet_ins(struct fail_open *fo) pin.up.packet = dp_packet_data(&b); pin.up.packet_len = dp_packet_size(&b); pin.up.reason = OFPR_NO_MATCH; - pin.up.fmd.in_port = OFPP_LOCAL; + match_init_catchall(&pin.up.flow_metadata); + match_set_in_port(&pin.up.flow_metadata, OFPP_LOCAL); pin.send_len = dp_packet_size(&b); pin.miss_type = OFPROTO_PACKET_IN_NO_MISS; connmgr_send_packet_in(fo->connmgr, &pin); diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 4dc169450..6c5770ad2 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -994,7 +994,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall, pin->up.reason = OFPR_NO_MATCH; pin->up.table_id = 0; pin->up.cookie = OVS_BE64_MAX; - flow_get_metadata(upcall->flow, &pin->up.fmd); + flow_get_metadata(upcall->flow, &pin->up.flow_metadata); pin->send_len = 0; /* Not used for flow table misses. */ pin->miss_type = OFPROTO_PACKET_IN_NO_MISS; ofproto_dpif_send_packet_in(upcall->ofproto, pin); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 6bb85188f..59cd088db 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3425,7 +3425,7 @@ execute_controller_action(struct xlate_ctx *ctx, int len, pin->up.table_id = ctx->table_id; pin->up.cookie = ctx->rule_cookie; - flow_get_metadata(&ctx->xin->flow, &pin->up.fmd); + flow_get_metadata(&ctx->xin->flow, &pin->up.flow_metadata); pin->controller_id = controller_id; pin->send_len = len; diff --git a/tests/ofp-print.at b/tests/ofp-print.at index c2250d08a..39a5bbb66 100644 --- a/tests/ofp-print.at +++ b/tests/ofp-print.at @@ -2702,7 +2702,7 @@ ff ff ff ff ff ff 00 00 00 00 82 82 82 82 82 82 \ 00 55 00 56 00 00 00 00 00 00 00 00 50 02 00 00 \ 31 6d 00 00 00 00 00 00 00 00 \ "], [0], [dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 metadata=0x5a5a5a5a5a5a5a5a reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,metadata=0x5a5a5a5a5a5a5a5a,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86,tcp_flags=syn tcp_csum:316d ]) AT_CLEANUP @@ -2723,7 +2723,7 @@ ff ff ff ff ff ff 00 00 00 00 82 82 82 82 82 82 \ 00 55 00 56 00 00 00 00 00 00 00 00 50 01 00 00 \ 31 6d 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 metadata=0x5a5a5a5a5a5a5a5a reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,metadata=0x5a5a5a5a5a5a5a5a,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86,tcp_flags=fin tcp_csum:316d 00000000 82 82 82 82 82 82 80 81-81 81 81 81 81 00 00 50 00000010 08 00 45 00 00 28 00 00-00 00 00 06 32 05 53 53 diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 76f2c86a3..fbc6fd9d8 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -1386,28 +1386,28 @@ AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0x1 total_len=60 in_port=1 (via action) data_len=60 (unbuffered) tcp,vlan_tci=0x0000,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11,tcp_flags=fin tcp_csum:0 dnl -NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=64 in_port=1 reg0=0x1 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=64 reg0=0x1,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11,tcp_flags=fin tcp_csum:0 dnl -NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=64 in_port=1 reg0=0x1 reg1=0x2 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=64 reg0=0x1,reg1=0x2,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11,tcp_flags=fin tcp_csum:0 dnl -NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11,tcp_flags=fin tcp_csum:0 dnl -NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11,tcp_flags=fin tcp_csum:1a03 dnl -NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11,tcp_flags=fin tcp_csum:3205 dnl -NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=85,tp_dst=11,tcp_flags=fin tcp_csum:31b8 dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=85,tp_dst=86,tcp_flags=fin tcp_csum:316d dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) tcp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=85,tp_dst=86,tcp_flags=fin tcp_csum:316d ]) @@ -1424,28 +1424,28 @@ AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0x1 total_len=60 in_port=1 (via action) data_len=60 (unbuffered) udp,vlan_tci=0x0000,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl -NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=64 in_port=1 reg0=0x1 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=64 reg0=0x1,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl -NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=64 in_port=1 reg0=0x1 reg1=0x2 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=64 reg0=0x1,reg1=0x2,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl -NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl -NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:2c37 dnl -NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:4439 dnl -NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=11 udp_csum:43ec dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 udp_csum:43a1 dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=64 (unbuffered) udp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 udp_csum:43a1 ]) @@ -1495,28 +1495,28 @@ AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0x1 total_len=98 in_port=1 (via action) data_len=98 (unbuffered) sctp,vlan_tci=0x0000,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl -NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=102 in_port=1 reg0=0x1 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=102 reg0=0x1,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl -NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=102 in_port=1 reg0=0x1 reg1=0x2 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=102 reg0=0x1,reg1=0x2,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl -NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=102 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=102 reg0=0x1,reg1=0x2,reg2=0x3,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl -NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=102 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=102 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl -NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=102 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl -NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=102 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=2223 sctp_csum:dd778f5f dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=102 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 sctp_csum:62051f56 dnl -NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=102 reg0=0x1,reg1=0x2,reg2=0x3,reg3=0x4,reg4=0x5,tun_id=0x6,in_port=1 (via action) data_len=102 (unbuffered) sctp,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 sctp_csum:62051f56 ]) diff --git a/tests/ofproto.at b/tests/ofproto.at index 9729a7c81..be1b298a6 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -2679,7 +2679,7 @@ ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl -NXT_PACKET_IN: total_len=14 in_port=CONTROLLER metadata=0xfafafafa5a5a5a5a pkt_mark=0xaa (via action) data_len=14 (unbuffered) +NXT_PACKET_IN: total_len=14 pkt_mark=0xaa,metadata=0xfafafafa5a5a5a5a,in_port=CONTROLLER (via action) data_len=14 (unbuffered) vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY: ]) @@ -2707,7 +2707,7 @@ ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl -OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY metadata=0xfafafafa5a5a5a5a (via action) data_len=14 (unbuffered) +OFPT_PACKET_IN (OF1.2): total_len=14 metadata=0xfafafafa5a5a5a5a,in_port=ANY (via action) data_len=14 (unbuffered) vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY (OF1.2): ]) @@ -2735,7 +2735,7 @@ ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl -OFPT_PACKET_IN (OF1.3): total_len=14 in_port=ANY metadata=0x6b (via action) data_len=14 (unbuffered) +OFPT_PACKET_IN (OF1.3): total_len=14 metadata=0x6b,in_port=ANY (via action) data_len=14 (unbuffered) vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY (OF1.3): ]) @@ -2762,7 +2762,7 @@ ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl -OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY tun_id=0x1020304 tun_src=127.0.0.1 tun_dst=192.168.0.1 (via action) data_len=14 (unbuffered) +OFPT_PACKET_IN (OF1.2): total_len=14 tun_id=0x1020304,tun_src=127.0.0.1,tun_dst=192.168.0.1,in_port=ANY (via action) data_len=14 (unbuffered) vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY (OF1.2): ]) From 6d9e94b522dc4b11f724365f1ca9cbf8669627da Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Thu, 4 Jun 2015 13:45:55 -0700 Subject: [PATCH 117/146] tunneling: Fix a tunnel name display bug Currently, 'ovs-appctl tnl/ports/show' command won't display gre port name correctly. Since netdev_vport_get_dpif_port() will not always set the 'namebuf' it receives. Should use the name by its return value instead. Found by inspection. Also extend the test case to cover this command. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ofproto/ofproto-dpif.c | 14 ++++++++------ tests/tunnel-push-pop.at | 8 ++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 22e5d5f34..c4cafe00b 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1626,6 +1626,7 @@ port_construct(struct ofport *port_) struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const struct netdev *netdev = port->up.netdev; char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; + const char *dp_port_name; struct dpif_port dpif_port; int error; @@ -1659,9 +1660,8 @@ port_construct(struct ofport *port_) return 0; } - error = dpif_port_query_by_name(ofproto->backer->dpif, - netdev_vport_get_dpif_port(netdev, namebuf, - sizeof namebuf), + dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); + error = dpif_port_query_by_name(ofproto->backer->dpif, dp_port_name, &dpif_port); if (error) { return error; @@ -1672,7 +1672,7 @@ port_construct(struct ofport *port_) if (netdev_get_tunnel_config(netdev)) { atomic_count_inc(&ofproto->backer->tnl_count); error = tnl_port_add(port, port->up.netdev, port->odp_port, - ovs_native_tunneling_is_on(ofproto), namebuf); + ovs_native_tunneling_is_on(ofproto), dp_port_name); if (error) { atomic_count_dec(&ofproto->backer->tnl_count); dpif_port_destroy(&dpif_port); @@ -1775,6 +1775,7 @@ port_modified(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; + const char *dp_port_name; struct netdev *netdev = port->up.netdev; if (port->bundle && port->bundle->bond) { @@ -1792,13 +1793,14 @@ port_modified(struct ofport *port_) ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm, port->lldp, port->up.pp.hw_addr); - netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); + dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); if (port->is_tunnel) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); if (tnl_port_reconfigure(port, netdev, port->odp_port, - ovs_native_tunneling_is_on(ofproto), namebuf)) { + ovs_native_tunneling_is_on(ofproto), + dp_port_name)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } } diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index 877e417ad..b9d3572b9 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -45,6 +45,14 @@ IP MAC Bridge 1.1.2.93 f8:bc:12:44:34:b7 br0 ]) +AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl +Listening ports: +genev_sys_6081 (6081) : eth_type(0x0800),ipv4(src=1.1.2.92,proto=17,frag=no),udp(dst=6081) +gre_sys (3) : eth_type(0x0800),ipv4(src=1.1.2.92,proto=47,frag=no) +vxlan_sys_4789 (4789) : eth_type(0x0800),ipv4(src=1.1.2.92,proto=17,frag=no),udp(dst=4789) +vxlan_sys_4789 (4789) : eth_type(0x0800),ipv4(src=1.1.2.93,proto=17,frag=no),udp(dst=4789) +]) + dnl Check VXLAN tunnel pop AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=51283,dst=4789)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], From 36247a75e9990a7f7ee3c5124852395f33ba1d38 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Sun, 15 Mar 2015 16:31:41 -0700 Subject: [PATCH 118/146] ovsdb-monitor: refactor ovsdb_monitor_create() Add ovsdb_monitor_add_jsonrpc_monitor(). This change will allow ovsdb_monitor to be reference counted. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/monitor.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 9541f3e76..18d5e3ae2 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -230,12 +230,22 @@ ovsdb_monitor_row_destroy(const struct ovsdb_monitor_table *mt, } } +static void +ovsdb_monitor_add_jsonrpc_monitor(struct ovsdb_monitor *dbmon, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor) +{ + struct jsonrpc_monitor_node *jm; + + jm = xzalloc(sizeof *jm); + jm->jsonrpc_monitor = jsonrpc_monitor; + list_push_back(&dbmon->jsonrpc_monitors, &jm->node); +} + struct ovsdb_monitor * ovsdb_monitor_create(struct ovsdb *db, struct ovsdb_jsonrpc_monitor *jsonrpc_monitor) { struct ovsdb_monitor *dbmon; - struct jsonrpc_monitor_node *jm; dbmon = xzalloc(sizeof *dbmon); @@ -246,10 +256,7 @@ ovsdb_monitor_create(struct ovsdb *db, dbmon->n_transactions = 0; shash_init(&dbmon->tables); - jm = xzalloc(sizeof *jm); - jm->jsonrpc_monitor = jsonrpc_monitor; - list_push_back(&dbmon->jsonrpc_monitors, &jm->node); - + ovsdb_monitor_add_jsonrpc_monitor(dbmon, jsonrpc_monitor); return dbmon; } From 6e5a9216f3a16c2de508dbaf960c643219d45fa6 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 16 Mar 2015 03:03:20 -0700 Subject: [PATCH 119/146] ovsdb-monitor: allow multiple jsonrpc monitors to share a single ovsdb monitor Store ovsdb monitor in a global hmap. If a newly created ovsdb monitor object monitors the same tables and columns as an existing one, the existing monitor will be reused. With this patch, jsonrpc monitor and ovsdb monitor now have N:1 mapping. The goals are to: 1) Reduce the cost of maintaining duplicated monitors. 2) Allow for create Json cache for the same updates. Json cache will be introduced in the following patch. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 9 +++ ovsdb/monitor.c | 137 ++++++++++++++++++++++++++++++++++++----- ovsdb/monitor.h | 10 ++- 3 files changed, 141 insertions(+), 15 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index efd83b8f0..0f7932e8a 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1157,6 +1157,7 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, const struct json *request_id) { struct ovsdb_jsonrpc_monitor *m = NULL; + struct ovsdb_monitor *dbmon = NULL; struct json *monitor_id, *monitor_requests; struct ovsdb_error *error = NULL; struct shash_node *node; @@ -1234,6 +1235,14 @@ ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, } } + dbmon = ovsdb_monitor_add(m->dbmon); + if (dbmon != m->dbmon) { + /* Found an exisiting dbmon, reuse the current one. */ + ovsdb_monitor_remove_jsonrpc_monitor(m->dbmon, m); + ovsdb_monitor_add_jsonrpc_monitor(dbmon, m); + m->dbmon = dbmon; + } + ovsdb_monitor_get_initial(m->dbmon); json = ovsdb_jsonrpc_monitor_compose_update(m, true); json = json ? json : json_object_create(); diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 18d5e3ae2..fb45ca6f7 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -30,6 +30,7 @@ #include "simap.h" #include "hash.h" #include "table.h" +#include "hash.h" #include "timeval.h" #include "transaction.h" #include "jsonrpc-server.h" @@ -38,6 +39,7 @@ static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; +static struct hmap ovsdb_monitors = HMAP_INITIALIZER(&ovsdb_monitors); /* Backend monitor. * @@ -51,6 +53,7 @@ struct ovsdb_monitor { struct ovs_list jsonrpc_monitors; /* Contains "jsonrpc_monitor_node"s. */ struct ovsdb *db; uint64_t n_transactions; /* Count number of committed transactions. */ + struct hmap_node hmap_node; /* Elements within ovsdb_monitors. */ }; struct jsonrpc_monitor_node { @@ -108,8 +111,8 @@ struct ovsdb_monitor_table { }; static void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon); -static void ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, - uint64_t next_txn); +static struct ovsdb_monitor_changes * ovsdb_monitor_table_add_changes( + struct ovsdb_monitor_table *mt, uint64_t next_txn); static struct ovsdb_monitor_changes *ovsdb_monitor_table_find_changes( struct ovsdb_monitor_table *mt, uint64_t unflushed); static void ovsdb_monitor_changes_destroy( @@ -230,7 +233,7 @@ ovsdb_monitor_row_destroy(const struct ovsdb_monitor_table *mt, } } -static void +void ovsdb_monitor_add_jsonrpc_monitor(struct ovsdb_monitor *dbmon, struct ovsdb_jsonrpc_monitor *jsonrpc_monitor) { @@ -255,6 +258,7 @@ ovsdb_monitor_create(struct ovsdb *db, dbmon->db = db; dbmon->n_transactions = 0; shash_init(&dbmon->tables); + hmap_node_nullify(&dbmon->hmap_node); ovsdb_monitor_add_jsonrpc_monitor(dbmon, jsonrpc_monitor); return dbmon; @@ -321,7 +325,7 @@ ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *m, return NULL; } -static void +static struct ovsdb_monitor_changes * ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, uint64_t next_txn) { @@ -334,6 +338,8 @@ ovsdb_monitor_table_add_changes(struct ovsdb_monitor_table *mt, changes->n_refs = 1; hmap_init(&changes->rows); hmap_insert(&mt->changes, &changes->hmap_node, hash_uint64(next_txn)); + + return changes; }; static struct ovsdb_monitor_changes * @@ -362,7 +368,6 @@ ovsdb_monitor_table_untrack_changes(struct ovsdb_monitor_table *mt, struct ovsdb_monitor_changes *changes = ovsdb_monitor_table_find_changes(mt, transaction); if (changes) { - ovs_assert(changes->transaction == transaction); if (--changes->n_refs == 0) { hmap_remove(&mt->changes, &changes->hmap_node); ovsdb_monitor_changes_destroy(changes); @@ -551,9 +556,6 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, snprintf(uuid, sizeof uuid, UUID_FMT, UUID_ARGS(&row->uuid)); json_object_put(table_json, uuid, row_json); } - - hmap_remove(&changes->rows, &row->hmap_node); - ovsdb_monitor_row_destroy(mt, row); } ovsdb_monitor_table_untrack_changes(mt, prev_txn); @@ -669,13 +671,16 @@ ovsdb_monitor_get_initial(const struct ovsdb_monitor *dbmon) if (mt->select & OJMS_INITIAL) { struct ovsdb_row *row; + struct ovsdb_monitor_changes *changes; - if (hmap_is_empty(&mt->changes)) { - ovsdb_monitor_table_add_changes(mt, 0); - } - - HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { - ovsdb_monitor_change_cb(NULL, row, NULL, &aux); + changes = ovsdb_monitor_table_find_changes(mt, 0); + if (!changes) { + changes = ovsdb_monitor_table_add_changes(mt, 0); + HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { + ovsdb_monitor_changes_update(NULL, row, mt, changes); + } + } else { + changes->n_refs++; } } } @@ -687,6 +692,11 @@ ovsdb_monitor_remove_jsonrpc_monitor(struct ovsdb_monitor *dbmon, { struct jsonrpc_monitor_node *jm; + if (list_is_empty(&dbmon->jsonrpc_monitors)) { + ovsdb_monitor_destroy(dbmon); + return; + } + /* Find and remove the jsonrpc monitor from the list. */ LIST_FOR_EACH(jm, node, &dbmon->jsonrpc_monitors) { if (jm->jsonrpc_monitor == jsonrpc_monitor) { @@ -706,6 +716,101 @@ ovsdb_monitor_remove_jsonrpc_monitor(struct ovsdb_monitor *dbmon, OVS_NOT_REACHED(); } +static bool +ovsdb_monitor_table_equal(const struct ovsdb_monitor_table *a, + const struct ovsdb_monitor_table *b) +{ + size_t i; + + if ((a->table != b->table) || + (a->select != b->select) || + (a->n_columns != b->n_columns)) { + return false; + } + + for (i = 0; i < a->n_columns; i++) { + if ((a->columns[i].column != b->columns[i].column) || + (a->columns[i].select != b->columns[i].select)) { + return false; + } + } + + return true; +} + +static bool +ovsdb_monitor_equal(const struct ovsdb_monitor *a, + const struct ovsdb_monitor *b) +{ + struct shash_node *node; + + if (shash_count(&a->tables) != shash_count(&b->tables)) { + return false; + } + + SHASH_FOR_EACH(node, &a->tables) { + const struct ovsdb_monitor_table *mta = node->data; + const struct ovsdb_monitor_table *mtb; + + mtb = shash_find_data(&b->tables, node->name); + if (!mtb) { + return false; + } + + if (!ovsdb_monitor_table_equal(mta, mtb)) { + return false; + } + } + + return true; +} + +static size_t +ovsdb_monitor_hash(const struct ovsdb_monitor *dbmon, size_t basis) +{ + const struct shash_node **nodes; + size_t i, j, n; + + nodes = shash_sort(&dbmon->tables); + n = shash_count(&dbmon->tables); + + for (i = 0; i < n; i++) { + struct ovsdb_monitor_table *mt = nodes[i]->data; + + basis = hash_pointer(mt->table, basis); + basis = hash_3words(mt->select, mt->n_columns, basis); + + for (j = 0; j < mt->n_columns; j++) { + basis = hash_pointer(mt->columns[j].column, basis); + basis = hash_2words(mt->columns[j].select, basis); + } + } + free(nodes); + + return basis; +} + +struct ovsdb_monitor * +ovsdb_monitor_add(struct ovsdb_monitor *new_dbmon) +{ + struct ovsdb_monitor *dbmon; + size_t hash; + + /* New_dbmon should be associated with only one jsonrpc + * connections. */ + ovs_assert(list_is_singleton(&new_dbmon->jsonrpc_monitors)); + + hash = ovsdb_monitor_hash(new_dbmon, 0); + HMAP_FOR_EACH_WITH_HASH(dbmon, hmap_node, hash, &ovsdb_monitors) { + if (ovsdb_monitor_equal(dbmon, new_dbmon)) { + return dbmon; + } + } + + hmap_insert(&ovsdb_monitors, &new_dbmon->hmap_node, hash); + return new_dbmon; +} + static void ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) { @@ -713,6 +818,10 @@ ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) list_remove(&dbmon->replica.node); + if (!hmap_node_is_null(&dbmon->hmap_node)) { + hmap_remove(&ovsdb_monitors, &dbmon->hmap_node); + } + SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; struct ovsdb_monitor_changes *changes, *next; diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h index 5fcb346e0..dc2fc1af6 100644 --- a/ovsdb/monitor.h +++ b/ovsdb/monitor.h @@ -28,7 +28,15 @@ enum ovsdb_monitor_selection { struct ovsdb_monitor *ovsdb_monitor_create(struct ovsdb *db, - struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); + +struct ovsdb_monitor *ovsdb_monitor_add(struct ovsdb_monitor *dbmon); + +void ovsdb_monitor_add_jsonrpc_monitor(struct ovsdb_monitor *dbmon, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); + +void ovsdb_monitor_remove_jsonrpc_monitor(struct ovsdb_monitor *dbmon, + struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); void ovsdb_monitor_remove_jsonrpc_monitor(struct ovsdb_monitor *dbmon, struct ovsdb_jsonrpc_monitor *jsonrpc_monitor); From 4c2809787cdbc774428253e2596c15d9daa76898 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 16 Mar 2015 15:45:27 -0700 Subject: [PATCH 120/146] ovsdb-monitor: add json cache Although multiple jsonrpc monitors can share the same ovsdb monitor, each change still needs to translated into json object from scratch. This can be wasteful if multiple jsonrpc monitors are interested in the same changes. Json cache improves this by keeping an copy of json object generated for transaction X to current transaction. When jsonrpc is interested in a change, the cache is searched first, if an json object is found, a copy of it is handed back, skipping the regeneration process. Any commit to the monitor will empty the cache. This can be further optimized to not throw away the cache if the updated tables and columns are not being monitored. Signed-off-by: Andy Zhou Acked-by: Ben Pfaff --- ovsdb/jsonrpc-server.c | 2 +- ovsdb/monitor.c | 123 +++++++++++++++++++++++++++++++++-------- ovsdb/monitor.h | 2 +- 3 files changed, 103 insertions(+), 24 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 0f7932e8a..fffcb731d 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -1294,7 +1294,7 @@ static struct json * ovsdb_jsonrpc_monitor_compose_update(struct ovsdb_jsonrpc_monitor *m, bool initial) { - return ovsdb_monitor_compose_update(m->dbmon, initial, &m->unflushed); + return ovsdb_monitor_get_update(m->dbmon, initial, &m->unflushed); } static bool diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index fb45ca6f7..257959c50 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -54,6 +54,15 @@ struct ovsdb_monitor { struct ovsdb *db; uint64_t n_transactions; /* Count number of committed transactions. */ struct hmap_node hmap_node; /* Elements within ovsdb_monitors. */ + struct hmap json_cache; /* Contains "ovsdb_monitor_json_cache_node"s.*/ +}; + +/* A json object of updates between 'from_txn' and 'dbmon->n_transactions' + * inclusive. */ +struct ovsdb_monitor_json_cache_node { + struct hmap_node hmap_node; /* Elements in json cache. */ + uint64_t from_txn; + struct json *json; /* Null, or a cloned of json */ }; struct jsonrpc_monitor_node { @@ -120,6 +129,50 @@ static void ovsdb_monitor_changes_destroy( static void ovsdb_monitor_table_track_changes(struct ovsdb_monitor_table *mt, uint64_t unflushed); +static struct ovsdb_monitor_json_cache_node * +ovsdb_monitor_json_cache_search(const struct ovsdb_monitor *dbmon, + uint64_t from_txn) +{ + struct ovsdb_monitor_json_cache_node *node; + uint32_t hash = hash_uint64(from_txn); + + HMAP_FOR_EACH_WITH_HASH(node, hmap_node, hash, &dbmon->json_cache) { + if (node->from_txn == from_txn) { + return node; + } + } + + return NULL; +} + +static void +ovsdb_monitor_json_cache_insert(struct ovsdb_monitor *dbmon, + uint64_t from_txn, struct json *json) +{ + struct ovsdb_monitor_json_cache_node *node; + uint32_t hash; + + node = xmalloc(sizeof *node); + + hash = hash_uint64(from_txn); + node->from_txn = from_txn; + node->json = json ? json_clone(json) : NULL; + + hmap_insert(&dbmon->json_cache, &node->hmap_node, hash); +} + +static void +ovsdb_monitor_json_cache_flush(struct ovsdb_monitor *dbmon) +{ + struct ovsdb_monitor_json_cache_node *node, *next; + + HMAP_FOR_EACH_SAFE(node, next, hmap_node, &dbmon->json_cache) { + hmap_remove(&dbmon->json_cache, &node->hmap_node); + json_destroy(node->json); + free(node); + } +} + static int compare_ovsdb_monitor_column(const void *a_, const void *b_) { @@ -259,6 +312,7 @@ ovsdb_monitor_create(struct ovsdb *db, dbmon->n_transactions = 0; shash_init(&dbmon->tables); hmap_node_nullify(&dbmon->hmap_node); + hmap_init(&dbmon->json_cache); ovsdb_monitor_add_jsonrpc_monitor(dbmon, jsonrpc_monitor); return dbmon; @@ -488,29 +542,16 @@ ovsdb_monitor_compose_row_update( } /* Constructs and returns JSON for a object (as described in - * RFC 7047) for all the outstanding changes within 'monitor', and deletes all - * the outstanding changes from 'monitor'. Returns NULL if no update needs to - * be sent. - * - * The caller should specify 'initial' as true if the returned JSON is going to - * be used as part of the initial reply to a "monitor" request, false if it is - * going to be used as part of an "update" notification. - * - * 'unflushed' should point to value that is the transaction ID that did - * was not updated. The update contains changes between - * ['unflushed, ovsdb->n_transcations]. Before the function returns, this - * value will be updated to ovsdb->n_transactions + 1, ready for the next - * update. */ -struct json * -ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, - bool initial, uint64_t *unflushed) + * RFC 7047) for all the outstanding changes within 'monitor', starting from + * 'transaction'. */ +static struct json* +ovsdb_monitor_compose_update(struct ovsdb_monitor *dbmon, + bool initial, uint64_t transaction) { struct shash_node *node; unsigned long int *changed; struct json *json; size_t max_columns; - uint64_t prev_txn = *unflushed; - uint64_t next_txn = dbmon->n_transactions + 1; max_columns = 0; SHASH_FOR_EACH (node, &dbmon->tables) { @@ -527,9 +568,8 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, struct ovsdb_monitor_changes *changes; struct json *table_json = NULL; - changes = ovsdb_monitor_table_find_changes(mt, prev_txn); + changes = ovsdb_monitor_table_find_changes(mt, transaction); if (!changes) { - ovsdb_monitor_table_track_changes(mt, next_txn); continue; } @@ -557,13 +597,48 @@ ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, json_object_put(table_json, uuid, row_json); } } + } + free(changed); + + return json; +} + +/* Returns JSON for a object (as described in RFC 7047) + * for all the outstanding changes within 'monitor' that starts from + * '*unflushed' transaction id. + * + * The caller should specify 'initial' as true if the returned JSON is going to + * be used as part of the initial reply to a "monitor" request, false if it is + * going to be used as part of an "update" notification. */ +struct json * +ovsdb_monitor_get_update(struct ovsdb_monitor *dbmon, + bool initial, uint64_t *unflushed) +{ + struct ovsdb_monitor_json_cache_node *cache_node; + struct shash_node *node; + struct json *json; + uint64_t prev_txn = *unflushed; + uint64_t next_txn = dbmon->n_transactions + 1; + + /* Return a clone of cached json if one exists. Otherwise, + * generate a new one and add it to the cache. */ + cache_node = ovsdb_monitor_json_cache_search(dbmon, prev_txn); + if (cache_node) { + json = cache_node->json ? json_clone(cache_node->json) : NULL; + } else { + json = ovsdb_monitor_compose_update(dbmon, initial, prev_txn); + ovsdb_monitor_json_cache_insert(dbmon, prev_txn, json); + } + + /* Maintain transaction id of 'changes'. */ + SHASH_FOR_EACH (node, &dbmon->tables) { + struct ovsdb_monitor_table *mt = node->data; ovsdb_monitor_table_untrack_changes(mt, prev_txn); ovsdb_monitor_table_track_changes(mt, next_txn); } - *unflushed = next_txn; - free(changed); + return json; } @@ -822,6 +897,9 @@ ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) hmap_remove(&ovsdb_monitors, &dbmon->hmap_node); } + ovsdb_monitor_json_cache_flush(dbmon); + hmap_destroy(&dbmon->json_cache); + SHASH_FOR_EACH (node, &dbmon->tables) { struct ovsdb_monitor_table *mt = node->data; struct ovsdb_monitor_changes *changes, *next; @@ -845,6 +923,7 @@ ovsdb_monitor_commit(struct ovsdb_replica *replica, struct ovsdb_monitor *m = ovsdb_monitor_cast(replica); struct ovsdb_monitor_aux aux; + ovsdb_monitor_json_cache_flush(m); ovsdb_monitor_init_aux(&aux, m); ovsdb_txn_for_each_change(txn, ovsdb_monitor_change_cb, &aux); m->n_transactions++; diff --git a/ovsdb/monitor.h b/ovsdb/monitor.h index dc2fc1af6..a8e531012 100644 --- a/ovsdb/monitor.h +++ b/ovsdb/monitor.h @@ -54,7 +54,7 @@ const char * OVS_WARN_UNUSED_RESULT ovsdb_monitor_table_check_duplicates(struct ovsdb_monitor *, const struct ovsdb_table *); -struct json *ovsdb_monitor_compose_update(const struct ovsdb_monitor *dbmon, +struct json *ovsdb_monitor_get_update(struct ovsdb_monitor *dbmon, bool initial, uint64_t *unflushed_transaction); void ovsdb_monitor_table_add_select(struct ovsdb_monitor *dbmon, From 4c0e587ce4408ff8181f7b094a3ef13ad273f20b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 8 Jun 2015 13:16:07 -0700 Subject: [PATCH 121/146] flow: Add 'const' qualifiers in flow extraction. Signed-off-by: Ben Pfaff Acked-by: Daniele Di Proietto --- lib/flow.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index 7dd0dfd80..d2dcc460e 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -89,9 +89,9 @@ BUILD_ASSERT_DECL(offsetof(struct flow, tp_src) + 2 * must contain at least 'size' bytes of data. Returns the first byte of data * removed. */ static inline const void * -data_pull(void **datap, size_t *sizep, size_t size) +data_pull(const void **datap, size_t *sizep, size_t size) { - char *data = (char *)*datap; + const char *data = *datap; *datap = data + size; *sizep -= size; return data; @@ -101,7 +101,7 @@ data_pull(void **datap, size_t *sizep, size_t size) * the head end of '*datap' and returns the first byte removed. Otherwise, * returns a null pointer without modifying '*datap'. */ static inline const void * -data_try_pull(void **datap, size_t *sizep, size_t size) +data_try_pull(const void **datap, size_t *sizep, size_t size) { return OVS_LIKELY(*sizep >= size) ? data_pull(datap, sizep, size) : NULL; } @@ -261,7 +261,7 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " /* Pulls the MPLS headers at '*datap' and returns the count of them. */ static inline int -parse_mpls(void **datap, size_t *sizep) +parse_mpls(const void **datap, size_t *sizep) { const struct mpls_hdr *mh; int count = 0; @@ -276,7 +276,7 @@ parse_mpls(void **datap, size_t *sizep) } static inline ovs_be16 -parse_vlan(void **datap, size_t *sizep) +parse_vlan(const void **datap, size_t *sizep) { const struct eth_header *eth = *datap; @@ -298,7 +298,7 @@ parse_vlan(void **datap, size_t *sizep) } static inline ovs_be16 -parse_ethertype(void **datap, size_t *sizep) +parse_ethertype(const void **datap, size_t *sizep) { const struct llc_snap_header *llc; ovs_be16 proto; @@ -331,7 +331,7 @@ parse_ethertype(void **datap, size_t *sizep) } static inline bool -parse_icmpv6(void **datap, size_t *sizep, const struct icmp6_hdr *icmp, +parse_icmpv6(const void **datap, size_t *sizep, const struct icmp6_hdr *icmp, const struct in6_addr **nd_target, uint8_t arp_buf[2][ETH_ADDR_LEN]) { @@ -423,11 +423,11 @@ void miniflow_extract(struct dp_packet *packet, struct miniflow *dst) { const struct pkt_metadata *md = &packet->md; - void *data = dp_packet_data(packet); + const void *data = dp_packet_data(packet); size_t size = dp_packet_size(packet); uint64_t *values = miniflow_values(dst); struct mf_ctx mf = { 0, values, values + FLOW_U64S }; - char *l2; + const char *l2; ovs_be16 dl_type; uint8_t nw_frag, nw_tos, nw_ttl, nw_proto; From d426c66caddf195ab546ec0f5565b530d215e1f9 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 9 Jun 2015 15:24:33 -0700 Subject: [PATCH 122/146] ofproto: Fix memory leak in ofproto_rule_delete(). Commit 401aa90e33be (ofproto: Fix memory leak in flow deletion.) fixed the memory leak when a rule is deleted, but failed to do the same when all rules in a bridge are deleted just before the bridge itself is deleted. This patch adds the necessary unref to ofproto_rule_delete(). Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 0a1d03250..029ff3757 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1443,6 +1443,7 @@ ofproto_rule_delete(struct ofproto *ofproto, struct rule *rule) ovs_mutex_lock(&ofproto_mutex); oftable_remove_rule(rule); ofproto->ofproto_class->rule_delete(rule); + ofproto_rule_unref(rule); ovs_mutex_unlock(&ofproto_mutex); } @@ -4842,7 +4843,9 @@ delete_flows__(const struct rule_collection *rules, if (next_table == rule->table_id) { classifier_defer(cls); } - classifier_remove(cls, &rule->cr); + if (!classifier_remove(cls, &rule->cr)) { + OVS_NOT_REACHED(); + } if (next_table != rule->table_id) { classifier_publish(cls); } @@ -7364,6 +7367,8 @@ oftable_remove_rule(struct rule *rule) if (classifier_remove(cls, &rule->cr)) { ofproto_rule_remove__(rule->ofproto, rule); + } else { + OVS_NOT_REACHED(); } } From 3139b8e909d4fdd0a32fa8e3c556966bc4f63ddc Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 9 Jun 2015 15:24:33 -0700 Subject: [PATCH 123/146] odp-util: Simplify parsing function for GCC. GCC 4.7.2 -O3 flagged potential use before initialization for the 'id' and 'id_mask' being scanned in scan_vxlan_gbp(). For the 'id' this was a real possiblity, but for the 'id_mask' it seems to be a false positive in gcc analysis. Simplify scan_vxlan_gbp() to fix this. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/odp-util.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 3204d16ae..dee85c850 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2890,14 +2890,12 @@ static int scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) { const char *s_base = s; - ovs_be16 id, id_mask; - uint8_t flags, flags_mask; + ovs_be16 id = 0, id_mask = 0; + uint8_t flags = 0, flags_mask = 0; if (!strncmp(s, "id=", 3)) { s += 3; s += scan_be16(s, &id, mask ? &id_mask : NULL); - } else if (mask) { - memset(&id_mask, 0, sizeof id_mask); } if (s[0] == ',') { @@ -2906,8 +2904,6 @@ scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) if (!strncmp(s, "flags=", 6)) { s += 6; s += scan_u8(s, &flags, mask ? &flags_mask : NULL); - } else if (mask) { - memset(&flags_mask, 0, sizeof flags_mask); } if (!strncmp(s, "))", 2)) { From 19ba142220fb109583c181039fb37d8f7aa6ff67 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 9 Jun 2015 15:24:33 -0700 Subject: [PATCH 124/146] ofp-parse: Use F_OUT_PORT when parsing. We set this field flag for the cases when an out_port should be parsed, but failed to make use of it. Two test cases needed to be updated due to use of out_port in flow add, while out_port is legal for flow deletes only. Suggested-by: Ben Pfaff Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/ofp-parse.c | 2 +- tests/ovs-ofctl.at | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 856044dbb..6125f27b6 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -353,7 +353,7 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, if (fm->table_id != 0xff) { *usable_protocols &= OFPUTIL_P_TID; } - } else if (!strcmp(name, "out_port")) { + } else if (fields & F_OUT_PORT && !strcmp(name, "out_port")) { if (!ofputil_port_from_string(value, &fm->out_port)) { error = xasprintf("%s is not a valid OpenFlow port", value); diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index 42be8f065..1e128278d 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -140,7 +140,7 @@ AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (OpenFlow 1.0)]) AT_DATA([flows.txt], [[ # comment -tcp,tp_src=123,out_port=5,actions=flood +tcp,tp_src=123,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 @@ -159,7 +159,7 @@ AT_CHECK([ovs-ofctl parse-flows flows.txt AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [[usable protocols: any chosen protocol: OpenFlow10-table_id -OFPT_FLOW_MOD: ADD tcp,tp_src=123 out_port:5 actions=FLOOD +OFPT_FLOW_MOD: ADD tcp,tp_src=123 actions=FLOOD OFPT_FLOW_MOD: ADD in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop OFPT_FLOW_MOD: ADD udp,dl_vlan_pcp=7 idle:5 actions=strip_vlan,output:0 OFPT_FLOW_MOD: ADD tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 @@ -177,7 +177,7 @@ AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (OpenFlow 1.1)]) AT_DATA([flows.txt], [[ # comment -tcp,tp_src=123,out_port=5,actions=flood +tcp,tp_src=123,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 @@ -196,7 +196,7 @@ AT_CHECK([ovs-ofctl --protocols OpenFlow11 parse-flows flows.txt AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [[usable protocols: any chosen protocol: OpenFlow11 -OFPT_FLOW_MOD (OF1.1): ADD tcp,tp_src=123 out_port:5 actions=FLOOD +OFPT_FLOW_MOD (OF1.1): ADD tcp,tp_src=123 actions=FLOOD OFPT_FLOW_MOD (OF1.1): ADD in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop OFPT_FLOW_MOD (OF1.1): ADD udp,dl_vlan_pcp=7 idle:5 actions=pop_vlan,output:0 OFPT_FLOW_MOD (OF1.1): ADD tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 From 78145f6edb62f6d6ec3a99f506dff3d617456f9a Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 9 Jun 2015 13:35:29 -0700 Subject: [PATCH 125/146] dpif: Always generate RFC4122 UUIDs for UFID. This patch sacrifices a few bits of hash quality from the 128-bit unique flow identifiers to make the UFIDs RFC4122-conformant as per the version 4 (random) UUID spec. Given that the 128-bit space is already quite large, this should not affect the spread of UFIDs in any meaningful way for hashing. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- lib/dpif.c | 2 ++ lib/uuid.c | 6 ++++++ lib/uuid.h | 1 + 3 files changed, 9 insertions(+) diff --git a/lib/dpif.c b/lib/dpif.c index aa5e64e3f..388ca734a 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -46,6 +46,7 @@ #include "tnl-arp-cache.h" #include "tnl-ports.h" #include "util.h" +#include "uuid.h" #include "valgrind.h" #include "openvswitch/vlog.h" @@ -852,6 +853,7 @@ dpif_flow_hash(const struct dpif *dpif OVS_UNUSED, ovsthread_once_done(&once); } hash_bytes128(key, key_len, secret, hash); + uuid_set_bits_v4((struct uuid *)hash); } /* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a diff --git a/lib/uuid.c b/lib/uuid.c index df1206e1c..0f2a58e5a 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -98,6 +98,12 @@ uuid_generate(struct uuid *uuid) /* AES output is exactly 16 bytes, so we encrypt directly into 'uuid'. */ aes128_encrypt(&key, copy, uuid); + uuid_set_bits_v4(uuid); +} + +void +uuid_set_bits_v4(struct uuid *uuid) +{ /* Set bits to indicate a random UUID. See RFC 4122 section 4.4. */ uuid->parts[2] &= ~0xc0000000; uuid->parts[2] |= 0x80000000; diff --git a/lib/uuid.h b/lib/uuid.h index 37e01d091..8c6f2f127 100644 --- a/lib/uuid.h +++ b/lib/uuid.h @@ -78,5 +78,6 @@ bool uuid_is_zero(const struct uuid *); int uuid_compare_3way(const struct uuid *, const struct uuid *); bool uuid_from_string(struct uuid *, const char *); bool uuid_from_string_prefix(struct uuid *, const char *); +void uuid_set_bits_v4(struct uuid *); #endif /* uuid.h */ From 10e92b4f7a1db4bd88452cdc3aaec5fd46c4f7fe Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 29 May 2015 17:08:45 -0700 Subject: [PATCH 126/146] odp-util: Reuse UUID marshalling for UFID. Unique flow identifiers are really a UUID of sorts, so it makes sense to reuse the UUID string representations for UFID. Suggested-by: Ben Pfaff Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- lib/odp-util.c | 36 ++++++++++-------------------------- tests/dpif-netdev.at | 2 +- tests/ofproto-dpif.at | 2 +- tests/ofproto-macros.at | 2 +- 4 files changed, 13 insertions(+), 29 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index dee85c850..f99683a24 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -37,6 +37,7 @@ #include "timeval.h" #include "unaligned.h" #include "util.h" +#include "uuid.h" #include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(odp_util); @@ -51,8 +52,6 @@ VLOG_DEFINE_THIS_MODULE(odp_util); * from another. */ static const char *delimiters = ", \t\r\n"; -static const char *hex_chars = "0123456789abcdefABCDEF"; - struct attr_len_tbl { int len; const struct attr_len_tbl *next; @@ -2354,24 +2353,12 @@ odp_ufid_from_string(const char *s_, ovs_u128 *ufid) const char *s = s_; if (ovs_scan(s, "ufid:")) { - size_t n; - s += 5; - if (ovs_scan(s, "0x")) { - s += 2; - } - n = strspn(s, hex_chars); - if (n != 32) { + if (!uuid_from_string_prefix((struct uuid *)ufid, s)) { return -EINVAL; } - - if (!ovs_scan(s, "%16"SCNx64"%16"SCNx64, &ufid->u64.hi, - &ufid->u64.lo)) { - return -EINVAL; - } - s += n; - s += strspn(s, delimiters); + s += UUID_LEN; return s - s_; } @@ -2382,8 +2369,7 @@ odp_ufid_from_string(const char *s_, ovs_u128 *ufid) void odp_format_ufid(const ovs_u128 *ufid, struct ds *ds) { - ds_put_format(ds, "ufid:%016"PRIx64"%016"PRIx64, ufid->u64.hi, - ufid->u64.lo); + ds_put_format(ds, "ufid:"UUID_FMT, UUID_ARGS((struct uuid *)ufid)); } /* Appends to 'ds' a string representation of the 'key_len' bytes of @@ -3214,15 +3200,13 @@ static int parse_odp_key_mask_attr(const char *s, const struct simap *port_names, struct ofpbuf *key, struct ofpbuf *mask) { - if (!strncmp(s, "ufid:", 5)) { - const char *start = s; + ovs_u128 ufid; + int len; - /* Skip UFID. */ - s += 5; - s += strspn(s, hex_chars); - s += strspn(s, delimiters); - - return s - start; + /* Skip UFID. */ + len = odp_ufid_from_string(s, &ufid); + if (len) { + return len; } SCAN_SINGLE("skb_priority(", uint32_t, u32, OVS_KEY_ATTR_PRIORITY); diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 067f9000c..e9af63f05 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -3,7 +3,7 @@ AT_BANNER([dpif-netdev]) # Strips out uninteresting parts of flow output, as well as parts # that vary from one run to another (e.g., timing and bond actions). m4_define([STRIP_XOUT], [[sed ' - s/ufid:[0-9a-f]* // + s/ufid:[-0-9a-f]* // s/used:[0-9]*\.[0-9]*/used:0.0/ s/actions:.*/actions: / s/packets:[0-9]*/packets:0/ diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index fbc6fd9d8..bd1b21c8e 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -5460,7 +5460,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: ovs-appctl revalidator/wait AT_CHECK([ovs-appctl dpif/dump-flows -m br0], [0], [stdout]) -UFID=`sed -n 's/\(ufid:[[0-9a-fA-F]]*\).*/\1/p' stdout` +UFID=`sed -n 's/\(ufid:[[-0-9a-fA-F]]*\).*/\1/p' stdout` AT_CHECK([ovs-appctl dpctl/get-flow $UFID], [0], [dnl recirc_id(0),in_port(1),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:drop ]) diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index fd915ef28..a69719be0 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -36,7 +36,7 @@ m4_divert_pop([PREPARE_TESTS]) m4_define([STRIP_XIDS], [[sed 's/ (xid=0x[0-9a-fA-F]*)//']]) m4_define([STRIP_DURATION], [[sed 's/\bduration=[0-9.]*s/duration=?s/']]) m4_define([STRIP_USED], [[sed 's/used:[0-9]\.[0-9]*/used:0.0/']]) -m4_define([STRIP_UFID], [[sed 's/ufid:[0-9a-f]* //']]) +m4_define([STRIP_UFID], [[sed 's/ufid:[-0-9a-f]* //']]) m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m']) # _OVS_VSWITCHD_START([vswitchd-aux-args]) From bdd7ecf5bfc4a255872aa60057b3b96f72b47d8a Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 29 May 2015 16:17:01 -0700 Subject: [PATCH 127/146] types: Rename and move ovs_u128_equal(). This function doesn't need to be exported in the public OVS headers, and it had an inconsistent name compared to uuid_equals(). Rename and move. Signed-off-by: Joe Stringer Acked-by: Ben Pfaff --- include/openvswitch/types.h | 7 ------- lib/dpif-netdev.c | 2 +- lib/dpif.c | 3 ++- lib/util.h | 7 +++++++ ofproto/ofproto-dpif-upcall.c | 2 +- tests/test-hash.c | 4 ++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/openvswitch/types.h b/include/openvswitch/types.h index 2afb7b71f..629a3e5c6 100644 --- a/include/openvswitch/types.h +++ b/include/openvswitch/types.h @@ -88,13 +88,6 @@ typedef union { } u64; } ovs_u128; -/* Returns non-zero if the parameters have equal value. */ -static inline int -ovs_u128_equal(const ovs_u128 *a, const ovs_u128 *b) -{ - return (a->u64.hi == b->u64.hi) && (a->u64.lo == b->u64.lo); -} - /* A 64-bit value, in network byte order, that is only aligned on a 32-bit * boundary. */ typedef struct { diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7df95239d..5b82c8b63 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1770,7 +1770,7 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd, if (ufidp) { CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, dp_netdev_flow_hash(ufidp), &pmd->flow_table) { - if (ovs_u128_equal(&netdev_flow->ufid, ufidp)) { + if (ovs_u128_equals(&netdev_flow->ufid, ufidp)) { return netdev_flow; } } diff --git a/lib/dpif.c b/lib/dpif.c index 388ca734a..783a715fb 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -901,7 +901,8 @@ dpif_probe_feature(struct dpif *dpif, const char *name, error = dpif_flow_get(dpif, key->data, key->size, ufid, PMD_ID_NULL, &reply, &flow); if (!error - && (!ufid || (flow.ufid_present && ovs_u128_equal(ufid, &flow.ufid)))) { + && (!ufid || (flow.ufid_present + && ovs_u128_equals(ufid, &flow.ufid)))) { enable_feature = true; } diff --git a/lib/util.h b/lib/util.h index 78abfd388..d1e470a41 100644 --- a/lib/util.h +++ b/lib/util.h @@ -551,6 +551,13 @@ void bitwise_put(uint64_t value, uint64_t bitwise_get(const void *src, unsigned int src_len, unsigned int src_ofs, unsigned int n_bits); +/* Returns non-zero if the parameters have equal value. */ +static inline int +ovs_u128_equals(const ovs_u128 *a, const ovs_u128 *b) +{ + return (a->u64.hi == b->u64.hi) && (a->u64.lo == b->u64.lo); +} + void xsleep(unsigned int seconds); #ifdef _WIN32 diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 6c5770ad2..c39b5712e 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1302,7 +1302,7 @@ ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid) struct cmap *cmap = &udpif->ukeys[idx].cmap; CMAP_FOR_EACH_WITH_HASH (ukey, cmap_node, get_ufid_hash(ufid), cmap) { - if (ovs_u128_equal(&ukey->ufid, ufid)) { + if (ovs_u128_equals(&ukey->ufid, ufid)) { return ukey; } } diff --git a/tests/test-hash.c b/tests/test-hash.c index 83521567b..67a1f6c89 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -162,7 +162,7 @@ check_hash_bytes128(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), set_bit128(&in1, i, n_bits); hash(in0, sizeof(ovs_u128), 0, &out0); hash(&in1, sizeof(ovs_u128), 0, &out1); - if (!ovs_u128_equal(&out0, &out1)) { + if (!ovs_u128_equals(&out0, &out1)) { printf("%s hash not the same for non-64 aligned data " "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n", name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi); @@ -214,7 +214,7 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), set_bit128(in1, i, n_bits); hash(in0, sizeof(ovs_u128) * 16, 0, &out0); hash(in1, sizeof(ovs_u128) * 16, 0, &out1); - if (!ovs_u128_equal(&out0, &out1)) { + if (!ovs_u128_equals(&out0, &out1)) { printf("%s hash not the same for non-64 aligned data " "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n", name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi); From 40e7cf5607052d3b4fa09fd433fa630352c115b6 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 7 Jun 2015 09:48:14 -0700 Subject: [PATCH 128/146] configure: Stop avoiding -Wformat-zero-length. Debian likes to enable -Wformat-zero-length, even over our code trying to disable it. It isn't too hard to make our code warning-free against this option, so this commit both stops disabling it and fixes the warnings. The first fix is to change set_subprogram_name() to take a plain string instead of a format string, and to adjust its few callers. This fixes one warning since one of those callers passed in an empty string. The second fix is to remove a test for ovs_scan() against an empty string. I couldn't find a way to avoid a warning for this test, and it isn't too valuable in any case. This allows us to drop filtering for -Wformat from the Debian rules file, so this commit removes it. Signed-off-by: Ben Pfaff --- configure.ac | 3 +-- debian/rules | 7 ------- lib/ovs-thread.c | 6 ++++-- lib/util.c | 21 +++++---------------- lib/util.h | 4 ++-- tests/test-util.c | 3 +-- 6 files changed, 13 insertions(+), 31 deletions(-) diff --git a/configure.ac b/configure.ac index 068674ee7..be5cf086c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. +# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -144,7 +144,6 @@ OVS_ENABLE_OPTION([-Wextra]) OVS_ENABLE_OPTION([-Wno-sign-compare]) OVS_ENABLE_OPTION([-Wpointer-arith]) OVS_ENABLE_OPTION([-Wformat-security]) -OVS_ENABLE_OPTION([-Wno-format-zero-length]) OVS_ENABLE_OPTION([-Wswitch-enum]) OVS_ENABLE_OPTION([-Wunused-parameter]) OVS_ENABLE_OPTION([-Wbad-function-cast]) diff --git a/debian/rules b/debian/rules index fc6ce57eb..38ecd6227 100755 --- a/debian/rules +++ b/debian/rules @@ -35,13 +35,6 @@ endif buildflags := $(shell if dpkg-buildflags --export=configure >/dev/null 2>&1; \ then dpkg-buildflags --export=configure; fi) -# dpkg-buildflags tends to turn on -Wformat, which is admirable, but -# the -Wformat-zero-length subset of that option triggers a couple of -# false positives in Open vSwitch so turn it right back off again. -# (We do this in configure.ac also, but the Debian buildflags override -# those.) -buildflags := $(patsubst -Wformat,-Wformat -Wno-format-zero-length,$(buildflags)) - configure: configure-stamp configure-stamp: dh_testdir diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c index b2d05a6cb..88b92d1d7 100644 --- a/lib/ovs-thread.c +++ b/lib/ovs-thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2014 Nicira, Inc. + * Copyright (c) 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -332,7 +332,9 @@ ovsthread_wrapper(void *aux_) /* The order of the following calls is important, because * ovsrcu_quiesce_end() saves a copy of the thread name. */ - set_subprogram_name("%s%u", aux.name, id); + char *subprogram_name = xasprintf("%s%u", aux.name, id); + set_subprogram_name(subprogram_name); + free(subprogram_name); ovsrcu_quiesce_end(); return aux.start(aux.arg); diff --git a/lib/util.c b/lib/util.c index 947398507..a7e027976 100644 --- a/lib/util.c +++ b/lib/util.c @@ -500,24 +500,13 @@ get_subprogram_name(void) return name ? name : ""; } -/* Sets the formatted value of 'format' as the name of the currently running - * thread or process. (This appears in log messages and may also be visible in - * system process listings and debuggers.) */ +/* Sets 'subprogram_name' as the name of the currently running thread or + * process. (This appears in log messages and may also be visible in system + * process listings and debuggers.) */ void -set_subprogram_name(const char *format, ...) +set_subprogram_name(const char *subprogram_name) { - char *pname; - - if (format) { - va_list args; - - va_start(args, format); - pname = xvasprintf(format, args); - va_end(args); - } else { - pname = xstrdup(program_name); - } - + char *pname = xstrdup(subprogram_name ? subprogram_name : program_name); free(subprogram_name_set(pname)); #if HAVE_GLIBC_PTHREAD_SETNAME_NP diff --git a/lib/util.h b/lib/util.h index d1e470a41..59276e357 100644 --- a/lib/util.h +++ b/lib/util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -263,7 +263,7 @@ extern "C" { ovs_set_program_name(name, OVS_PACKAGE_VERSION) const char *get_subprogram_name(void); -void set_subprogram_name(const char *format, ...) OVS_PRINTF_FORMAT(1, 2); + void set_subprogram_name(const char *); void ovs_print_version(uint8_t min_ofp, uint8_t max_ofp); diff --git a/tests/test-util.c b/tests/test-util.c index 1d63d68d1..8eedaf09e 100644 --- a/tests/test-util.c +++ b/tests/test-util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -497,7 +497,6 @@ test_ovs_scan(struct ovs_cmdl_context *ctx OVS_UNUSED) long l, l2; int i, i2; - ovs_assert(ovs_scan("", "")); ovs_assert(ovs_scan("", " ")); ovs_assert(ovs_scan(" ", " ")); ovs_assert(ovs_scan(" ", " ")); From 40c9675a1e88804fcef9658ddacdf80dd82a1763 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 7 Jun 2015 09:48:15 -0700 Subject: [PATCH 129/146] debian: Remove obsolete manual setting of CFLAGS and warnings from rules. Setting CFLAGS by hand before invoking dpkg-buildflags is ineffective, because dpkg-buildflags overrides it. Reported-by: Andrey Korolyov Signed-off-by: Ben Pfaff --- debian/rules | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/debian/rules b/debian/rules index 38ecd6227..6d1ccec41 100755 --- a/debian/rules +++ b/debian/rules @@ -22,13 +22,6 @@ PARALLEL = endif MAKEFLAGS += $(PARALLEL) -CFLAGS += -g -ifneq (,$(filter noopt,$(DEB_BUILD_OPTIONS))) -CFLAGS += -O0 -else -CFLAGS += -O2 -endif - # Old versions of dpkg-buildflags do not understand --export=configure. # When dpkg-buildflags does not understand an option, it prints its full # --help output on stdout, so we have to avoid that here. @@ -45,7 +38,7 @@ configure-stamp: cd _debian && ( \ test -e Makefile || \ ../configure --prefix=/usr --localstatedir=/var --enable-ssl \ - --sysconfdir=/etc --host=$(DEB_HOST_GNU_TYPE) CFLAGS="$(CFLAGS)" \ + --sysconfdir=/etc --host=$(DEB_HOST_GNU_TYPE) \ $(buildflags) $(DATAPATH_CONFIGURE_OPTS)) touch configure-stamp From 4fcb208348e1f6fdc62cc0bcfdcf5b6570ae3f1a Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 5 Jun 2015 14:03:11 -0700 Subject: [PATCH 130/146] ofproto: Rename *_begin functions as *_start. Weirdest things can bother you at night when you try to sleep ;-) Now we have function triples such as add_flow_begin(), add_flow_finish(), and add_flow_revert(), where a modification is started in *_begin(), which can fail, and when successful can be either made permanent with *_finish(), or cancelled with *_revert(). Linguistically it should be either "begin/end" or "start/finish", not "begin/finish". "begin/end" has some C++ STL baggage, so let's go with "start/finish". IMO "revert" rhymes with it, too. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 029ff3757..f2e9557c6 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -278,7 +278,7 @@ static bool ofproto_group_exists(const struct ofproto *ofproto, OVS_EXCLUDED(ofproto->groups_rwlock); static enum ofperr add_group(struct ofproto *, struct ofputil_group_mod *); static void handle_openflow(struct ofconn *, const struct ofpbuf *); -static enum ofperr do_bundle_flow_mod_begin(struct ofproto *, +static enum ofperr do_bundle_flow_mod_start(struct ofproto *, struct ofputil_flow_mod *, struct ofp_bundle_entry *) OVS_REQUIRES(ofproto_mutex); @@ -4357,7 +4357,7 @@ set_conjunctions(struct rule *rule, const struct cls_conjunction *conjs, * * The caller retains ownership of 'fm->ofpacts'. */ static enum ofperr -add_flow_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, +add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct rule **rulep, bool *modify) OVS_REQUIRES(ofproto_mutex) { @@ -4469,7 +4469,7 @@ add_flow_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return 0; } -/* Revert the effects of add_flow_begin(). +/* Revert the effects of add_flow_start(). * 'new_rule' must be passed in as NULL, if no new rule was allocated and * inserted to the classifier. * Note: evictions cannot be reverted. */ @@ -4706,7 +4706,7 @@ modify_flows__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } static enum ofperr -modify_flows_begin__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, +modify_flows_start__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { @@ -4718,7 +4718,7 @@ modify_flows_begin__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, || fm->new_cookie == OVS_BE64_MAX)) { bool modify; - error = add_flow_begin(ofproto, fm, &rules->rules[0], &modify); + error = add_flow_start(ofproto, fm, &rules->rules[0], &modify); if (!error) { ovs_assert(!modify); } @@ -4735,7 +4735,7 @@ modify_flows_begin__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, * if any. */ static enum ofperr -modify_flows_begin_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, +modify_flows_start_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { @@ -4750,7 +4750,7 @@ modify_flows_begin_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, rule_criteria_destroy(&criteria); if (!error) { - error = modify_flows_begin__(ofproto, fm, rules); + error = modify_flows_start__(ofproto, fm, rules); } if (error) { @@ -4788,7 +4788,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error * code on failure. */ static enum ofperr -modify_flow_begin_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, +modify_flow_start_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { @@ -4804,7 +4804,7 @@ modify_flow_begin_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, if (!error) { /* collect_rules_strict() can return max 1 rule. */ - error = modify_flows_begin__(ofproto, fm, rules); + error = modify_flows_start__(ofproto, fm, rules); } if (error) { @@ -4865,7 +4865,7 @@ delete_flows__(const struct rule_collection *rules, /* Implements OFPFC_DELETE. */ static enum ofperr -delete_flows_begin_loose(struct ofproto *ofproto, +delete_flows_start_loose(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) @@ -4916,7 +4916,7 @@ delete_flows_finish(const struct ofputil_flow_mod *fm, /* Implements OFPFC_DELETE_STRICT. */ static enum ofperr -delete_flow_begin_strict(struct ofproto *ofproto, +delete_flow_start_strict(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) @@ -5077,7 +5077,7 @@ handle_flow_mod__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, enum ofperr error; ovs_mutex_lock(&ofproto_mutex); - error = do_bundle_flow_mod_begin(ofproto, fm, &be); + error = do_bundle_flow_mod_start(ofproto, fm, &be); if (!error) { do_bundle_flow_mod_finish(ofproto, fm, req, &be); } @@ -6443,25 +6443,25 @@ handle_table_mod(struct ofconn *ofconn, const struct ofp_header *oh) } static enum ofperr -do_bundle_flow_mod_begin(struct ofproto *ofproto, struct ofputil_flow_mod *fm, +do_bundle_flow_mod_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct ofp_bundle_entry *be) OVS_REQUIRES(ofproto_mutex) { switch (fm->command) { case OFPFC_ADD: - return add_flow_begin(ofproto, fm, &be->rule, &be->modify); + return add_flow_start(ofproto, fm, &be->rule, &be->modify); case OFPFC_MODIFY: - return modify_flows_begin_loose(ofproto, fm, &be->rules); + return modify_flows_start_loose(ofproto, fm, &be->rules); case OFPFC_MODIFY_STRICT: - return modify_flow_begin_strict(ofproto, fm, &be->rules); + return modify_flow_start_strict(ofproto, fm, &be->rules); case OFPFC_DELETE: - return delete_flows_begin_loose(ofproto, fm, &be->rules); + return delete_flows_start_loose(ofproto, fm, &be->rules); case OFPFC_DELETE_STRICT: - return delete_flow_begin_strict(ofproto, fm, &be->rules); + return delete_flow_start_strict(ofproto, fm, &be->rules); } return OFPERR_OFPFMFC_BAD_COMMAND; @@ -6561,7 +6561,7 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) /* Not supported yet. */ error = OFPERR_OFPBFC_MSG_FAILED; } else if (be->type == OFPTYPE_FLOW_MOD) { - error = do_bundle_flow_mod_begin(ofproto, &be->fm, be); + error = do_bundle_flow_mod_start(ofproto, &be->fm, be); } else { OVS_NOT_REACHED(); } From db5076eee46e5ad4d67dc02b902c9b4aaeb190a4 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 5 Jun 2015 14:03:12 -0700 Subject: [PATCH 131/146] ovs-ofctl: Add bundle support and unit testing. All existing ovs-ofctl flow mod commands now take an optional '--bundle' argument, which executes the flow mods as a single transaction. OpenFlow 1.4+ is implicitly assumed when '--bundle' is specified. ovs-ofctl 'add-flow' and 'add-flows' commands now accept flow specifications that start with an optional 'add', 'modify', 'delete', 'modify_strict', or 'delete_strict' keyword, so that arbitrary flow table modifications may be specified. For backwards compatibility, a missing keyword is treated as an 'add'. With the new '--bundle' option all the modifications are executed as a single transaction using an OpenFlow 1.4 bundle. OpenFlow 1.4 requires bundles to support at least flow and port mods. This implementation does not yet support port mods in bundles. Another restriction is that the atomic transactions are not yet supported. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- NEWS | 19 ++- include/openvswitch/vconn.h | 3 + lib/ofp-parse.c | 40 +++++- lib/ofp-parse.h | 6 +- lib/ofp-util.c | 30 +++++ lib/ofp-util.h | 2 + lib/ofp-version-opt.c | 7 + lib/ofp-version-opt.h | 1 + lib/vconn.c | 253 ++++++++++++++++++++++++++++++++---- tests/ofproto-macros.at | 10 ++ tests/ofproto.at | 244 ++++++++++++++++++++++++++++++++++ tests/ovs-ofctl.at | 107 +++++++++++++++ utilities/ovs-ofctl.8.in | 58 +++++++-- utilities/ovs-ofctl.c | 86 ++++++++++-- 14 files changed, 812 insertions(+), 54 deletions(-) diff --git a/NEWS b/NEWS index a4806077d..5bea23798 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ Post-v2.3.0 --------------------- - - Added support for SFQ, FQ_CoDel and CoDel qdiscs. + - Added support for SFQ, FQ_CoDel and CoDel qdiscs. - Add bash command-line completion support for ovs-vsctl Please check utilities/ovs-command-compgen.INSTALL.md for how to use. - The MAC learning feature now includes per-port fairness to mitigate @@ -27,6 +27,11 @@ Post-v2.3.0 commands are now redundant and will be removed in a future release. See ovs-vswitchd(8) for details. - OpenFlow: + * OpenFlow 1.4 bundles are now supported, but for flow mod + messages only. 'atomic' bundles are not yet supported, and + 'ordered' bundles are trivially supported, as all bundled + messages are executed in the order they were added to the + bundle regardless of the presence of the 'ordered' flag. * IPv6 flow label and neighbor discovery fields are now modifiable. * OpenFlow 1.5 extended registers are now supported. * The OpenFlow 1.5 actset_output field is now supported. @@ -41,6 +46,18 @@ Post-v2.3.0 * A new Netronome extension to OpenFlow 1.5+ allows control over the fields hashed for OpenFlow select groups. See "selection_method" and related options in ovs-ofctl(8) for details. + - ovs-ofctl has a new '--bundle' option that makes the flow mod commands + ('add-flow', 'add-flows', 'mod-flows', 'del-flows', and 'replace-flows') + use an OpenFlow 1.4 bundle to operate the modifications as a single + transaction. If any of the flow mods in a transaction fail, none of + them are executed. + - ovs-ofctl 'add-flow' and 'add-flows' commands now accept arbitrary flow + mods as an input by allowing the flow specification to start with an + explicit 'add', 'modify', 'modify_strict', 'delete', or 'delete_strict' + keyword. A missing keyword is treated as 'add', so this is fully + backwards compatible. With the new '--bundle' option all the flow mods + are executed as a single transaction using the new OpenFlow 1.4 bundles + support. - ovs-pki: Changed message digest algorithm from MD5 to SHA-1 because MD5 is no longer secure and some operating systems have started to disable it in OpenSSL. diff --git a/include/openvswitch/vconn.h b/include/openvswitch/vconn.h index 3b157e1b0..f8b66553c 100644 --- a/include/openvswitch/vconn.h +++ b/include/openvswitch/vconn.h @@ -55,6 +55,9 @@ int vconn_transact(struct vconn *, struct ofpbuf *, struct ofpbuf **); int vconn_transact_noreply(struct vconn *, struct ofpbuf *, struct ofpbuf **); int vconn_transact_multiple_noreply(struct vconn *, struct ovs_list *requests, struct ofpbuf **replyp); +int vconn_bundle_transact(struct vconn *, struct ovs_list *requests, + uint16_t bundle_flags, + void (*error_reporter)(const struct ofp_header *)); void vconn_run(struct vconn *); void vconn_run_wait(struct vconn *); diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 6125f27b6..210feed1e 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -258,6 +258,29 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, *usable_protocols = OFPUTIL_P_ANY; + if (command == -2) { + size_t len; + + string += strspn(string, " \t\r\n"); /* Skip white space. */ + len = strcspn(string, ", \t\r\n"); /* Get length of the first token. */ + + if (!strncmp(string, "add", len)) { + command = OFPFC_ADD; + } else if (!strncmp(string, "delete", len)) { + command = OFPFC_DELETE; + } else if (!strncmp(string, "delete_strict", len)) { + command = OFPFC_DELETE_STRICT; + } else if (!strncmp(string, "modify", len)) { + command = OFPFC_MODIFY; + } else if (!strncmp(string, "modify_strict", len)) { + command = OFPFC_MODIFY_STRICT; + } else { + len = 0; + command = OFPFC_ADD; + } + string += len; + } + switch (command) { case -1: fields = F_OUT_PORT; @@ -486,6 +509,10 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, * constant for 'command'. To parse syntax for an OFPST_FLOW or * OFPST_AGGREGATE (or NXST_FLOW or NXST_AGGREGATE), use -1 for 'command'. * + * If 'command' is given as -2, 'str_' may begin with a command name ("add", + * "modify", "delete", "modify_strict", or "delete_strict"). A missing command + * name is treated as "add". + * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * OVS_WARN_UNUSED_RESULT @@ -818,14 +845,19 @@ parse_flow_monitor_request(struct ofputil_flow_monitor_request *fmr, /* Parses 'string' as an OFPT_FLOW_MOD or NXT_FLOW_MOD with command 'command' * (one of OFPFC_*) into 'fm'. * + * If 'command' is given as -2, 'string' may begin with a command name ("add", + * "modify", "delete", "modify_strict", or "delete_strict"). A missing command + * name is treated as "add". + * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * OVS_WARN_UNUSED_RESULT parse_ofp_flow_mod_str(struct ofputil_flow_mod *fm, const char *string, - uint16_t command, + int command, enum ofputil_protocol *usable_protocols) { char *error = parse_ofp_str(fm, command, string, usable_protocols); + if (!error) { /* Normalize a copy of the match. This ensures that non-normalized * flows get logged but doesn't affect what gets sent to the switch, so @@ -883,10 +915,14 @@ parse_ofp_table_mod(struct ofputil_table_mod *tm, const char *table_id, * type (one of OFPFC_*). Stores each flow_mod in '*fm', an array allocated * on the caller's behalf, and the number of flow_mods in '*n_fms'. * + * If 'command' is given as -2, each line may start with a command name + * ("add", "modify", "delete", "modify_strict", or "delete_strict"). A missing + * command name is treated as "add". + * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * OVS_WARN_UNUSED_RESULT -parse_ofp_flow_mod_file(const char *file_name, uint16_t command, +parse_ofp_flow_mod_file(const char *file_name, int command, struct ofputil_flow_mod **fms, size_t *n_fms, enum ofputil_protocol *usable_protocols) { diff --git a/lib/ofp-parse.h b/lib/ofp-parse.h index db30f4323..f1126034b 100644 --- a/lib/ofp-parse.h +++ b/lib/ofp-parse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_, OVS_WARN_UNUSED_RESULT; char *parse_ofp_flow_mod_str(struct ofputil_flow_mod *, const char *string, - uint16_t command, + int command, enum ofputil_protocol *usable_protocols) OVS_WARN_UNUSED_RESULT; @@ -51,7 +51,7 @@ char *parse_ofp_table_mod(struct ofputil_table_mod *, enum ofputil_protocol *usable_protocols) OVS_WARN_UNUSED_RESULT; -char *parse_ofp_flow_mod_file(const char *file_name, uint16_t command, +char *parse_ofp_flow_mod_file(const char *file_name, int command, struct ofputil_flow_mod **fms, size_t *n_fms, enum ofputil_protocol *usable_protocols) OVS_WARN_UNUSED_RESULT; diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 9004b8d30..89359c145 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -8695,6 +8695,36 @@ ofputil_decode_bundle_ctrl(const struct ofp_header *oh, return 0; } +struct ofpbuf * +ofputil_encode_bundle_ctrl_request(enum ofp_version ofp_version, + struct ofputil_bundle_ctrl_msg *bc) +{ + struct ofpbuf *request; + struct ofp14_bundle_ctrl_msg *m; + + switch (ofp_version) { + case OFP10_VERSION: + case OFP11_VERSION: + case OFP12_VERSION: + case OFP13_VERSION: + ovs_fatal(0, "bundles need OpenFlow 1.4 or later " + "(\'-O OpenFlow14\')"); + case OFP14_VERSION: + case OFP15_VERSION: + request = ofpraw_alloc(OFPRAW_OFPT14_BUNDLE_CONTROL, ofp_version, 0); + m = ofpbuf_put_zeros(request, sizeof *m); + + m->bundle_id = htonl(bc->bundle_id); + m->type = htons(bc->type); + m->flags = htons(bc->flags); + break; + default: + OVS_NOT_REACHED(); + } + + return request; +} + struct ofpbuf * ofputil_encode_bundle_ctrl_reply(const struct ofp_header *oh, struct ofputil_bundle_ctrl_msg *msg) diff --git a/lib/ofp-util.h b/lib/ofp-util.h index 549f1183a..596c2e2a0 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -1114,6 +1114,8 @@ enum ofptype; enum ofperr ofputil_decode_bundle_ctrl(const struct ofp_header *, struct ofputil_bundle_ctrl_msg *); +struct ofpbuf *ofputil_encode_bundle_ctrl_request(enum ofp_version, + struct ofputil_bundle_ctrl_msg *); struct ofpbuf *ofputil_encode_bundle_ctrl_reply(const struct ofp_header *, struct ofputil_bundle_ctrl_msg *); diff --git a/lib/ofp-version-opt.c b/lib/ofp-version-opt.c index 46fb45a4d..1cf57e18f 100644 --- a/lib/ofp-version-opt.c +++ b/lib/ofp-version-opt.c @@ -26,6 +26,13 @@ mask_allowed_ofp_versions(uint32_t bitmap) allowed_versions &= bitmap; } +void +add_allowed_ofp_versions(uint32_t bitmap) +{ + assert_single_threaded(); + allowed_versions |= bitmap; +} + void ofp_version_usage(void) { diff --git a/lib/ofp-version-opt.h b/lib/ofp-version-opt.h index 6bf5eed8a..82b4ccce7 100644 --- a/lib/ofp-version-opt.h +++ b/lib/ofp-version-opt.h @@ -21,6 +21,7 @@ uint32_t get_allowed_ofp_versions(void); void set_allowed_ofp_versions(const char *string); void mask_allowed_ofp_versions(uint32_t); +void add_allowed_ofp_versions(uint32_t); void ofp_version_usage(void); #endif diff --git a/lib/vconn.c b/lib/vconn.c index c033b48d8..d83594332 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -744,6 +744,41 @@ vconn_recv_block(struct vconn *vconn, struct ofpbuf **msgp) return retval; } +static int +vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, struct ofpbuf **replyp, + void (*error_reporter)(const struct ofp_header *)) +{ + for (;;) { + ovs_be32 recv_xid; + struct ofpbuf *reply; + const struct ofp_header *oh; + enum ofptype type; + int error; + + error = vconn_recv_block(vconn, &reply); + if (error) { + *replyp = NULL; + return error; + } + oh = reply->data; + recv_xid = oh->xid; + if (xid == recv_xid) { + *replyp = reply; + return 0; + } + + error = ofptype_decode(&type, oh); + if (!error && type == OFPTYPE_ERROR && error_reporter) { + error_reporter(oh); + } else { + VLOG_DBG_RL(&bad_ofmsg_rl, "%s: received reply with xid %08"PRIx32 + " != expected %08"PRIx32, + vconn->name, ntohl(recv_xid), ntohl(xid)); + } + ofpbuf_delete(reply); + } +} + /* Waits until a message with a transaction ID matching 'xid' is received on * 'vconn'. Returns 0 if successful, in which case the reply is stored in * '*replyp' for the caller to examine and free. Otherwise returns a positive @@ -753,27 +788,24 @@ vconn_recv_block(struct vconn *vconn, struct ofpbuf **msgp) int vconn_recv_xid(struct vconn *vconn, ovs_be32 xid, struct ofpbuf **replyp) { - for (;;) { - ovs_be32 recv_xid; - struct ofpbuf *reply; - int error; + return vconn_recv_xid__(vconn, xid, replyp, NULL); +} - error = vconn_recv_block(vconn, &reply); - if (error) { - *replyp = NULL; - return error; - } - recv_xid = ((struct ofp_header *) reply->data)->xid; - if (xid == recv_xid) { - *replyp = reply; - return 0; - } +static int +vconn_transact__(struct vconn *vconn, struct ofpbuf *request, + struct ofpbuf **replyp, + void (*error_reporter)(const struct ofp_header *)) +{ + ovs_be32 send_xid = ((struct ofp_header *) request->data)->xid; + int error; - VLOG_DBG_RL(&bad_ofmsg_rl, "%s: received reply with xid %08"PRIx32 - " != expected %08"PRIx32, - vconn->name, ntohl(recv_xid), ntohl(xid)); - ofpbuf_delete(reply); + *replyp = NULL; + error = vconn_send_block(vconn, request); + if (error) { + ofpbuf_delete(request); } + return error ? error : vconn_recv_xid__(vconn, send_xid, replyp, + error_reporter); } /* Sends 'request' to 'vconn' and blocks until it receives a reply with a @@ -790,15 +822,7 @@ int vconn_transact(struct vconn *vconn, struct ofpbuf *request, struct ofpbuf **replyp) { - ovs_be32 send_xid = ((struct ofp_header *) request->data)->xid; - int error; - - *replyp = NULL; - error = vconn_send_block(vconn, request); - if (error) { - ofpbuf_delete(request); - } - return error ? error : vconn_recv_xid(vconn, send_xid, replyp); + return vconn_transact__(vconn, request, replyp, NULL); } /* Sends 'request' followed by a barrier request to 'vconn', then blocks until @@ -897,6 +921,179 @@ vconn_transact_multiple_noreply(struct vconn *vconn, struct ovs_list *requests, return 0; } +static enum ofperr +vconn_bundle_reply_validate(struct ofpbuf *reply, + struct ofputil_bundle_ctrl_msg *request, + void (*error_reporter)(const struct ofp_header *)) +{ + const struct ofp_header *oh; + enum ofptype type; + enum ofperr error; + struct ofputil_bundle_ctrl_msg rbc; + + oh = reply->data; + error = ofptype_decode(&type, oh); + if (error) { + return error; + } + + if (type == OFPTYPE_ERROR) { + error_reporter(oh); + return ofperr_decode_msg(oh, NULL); + } + if (type != OFPTYPE_BUNDLE_CONTROL) { + return OFPERR_OFPBRC_BAD_TYPE; + } + + error = ofputil_decode_bundle_ctrl(oh, &rbc); + if (error) { + return error; + } + + if (rbc.bundle_id != request->bundle_id) { + return OFPERR_OFPBFC_BAD_ID; + } + + if (rbc.type != request->type + 1) { + return OFPERR_OFPBFC_BAD_TYPE; + } + + return 0; +} + +/* Send bundle control message 'bc' of 'type' via 'vconn', and wait for either + * an error or the corresponding bundle control message response. + * + * 'error_reporter' is called for any error responses received, which may be + * also regarding earlier OpenFlow messages than this bundle control message. + * + * Returns errno value, or 0 when successful. */ +static int +vconn_bundle_control_transact(struct vconn *vconn, + struct ofputil_bundle_ctrl_msg *bc, + uint16_t type, + void (*error_reporter)(const struct ofp_header *)) +{ + struct ofpbuf *request, *reply; + int error; + enum ofperr ofperr; + + bc->type = type; + request = ofputil_encode_bundle_ctrl_request(vconn->version, bc); + ofpmsg_update_length(request); + error = vconn_transact__(vconn, request, &reply, error_reporter); + if (error) { + return error; + } + + ofperr = vconn_bundle_reply_validate(reply, bc, error_reporter); + if (ofperr) { + VLOG_WARN_RL(&bad_ofmsg_rl, "Bundle %s failed (%s).", + type == OFPBCT_OPEN_REQUEST ? "open" + : type == OFPBCT_CLOSE_REQUEST ? "close" + : type == OFPBCT_COMMIT_REQUEST ? "commit" + : type == OFPBCT_DISCARD_REQUEST ? "discard" + : "control message", + ofperr_to_string(ofperr)); + } + ofpbuf_delete(reply); + + return ofperr ? EPROTO : 0; +} + +/* Checks if error responses can be received on 'vconn'. */ +static void +vconn_recv_error(struct vconn *vconn, + void (*error_reporter)(const struct ofp_header *)) +{ + int error; + + do { + struct ofpbuf *reply; + + error = vconn_recv(vconn, &reply); + if (!error) { + const struct ofp_header *oh; + enum ofptype type; + enum ofperr ofperr; + + oh = reply->data; + ofperr = ofptype_decode(&type, oh); + if (!ofperr && type == OFPTYPE_ERROR) { + error_reporter(oh); + } else { + VLOG_DBG_RL(&bad_ofmsg_rl, + "%s: received unexpected reply with xid %08"PRIx32, + vconn->name, ntohl(oh->xid)); + } + ofpbuf_delete(reply); + } + } while (!error); +} + +static int +vconn_bundle_add_msg(struct vconn *vconn, struct ofputil_bundle_ctrl_msg *bc, + struct ofpbuf *msg, + void (*error_reporter)(const struct ofp_header *)) +{ + struct ofputil_bundle_add_msg bam; + struct ofpbuf *request; + int error; + + bam.bundle_id = bc->bundle_id; + bam.flags = bc->flags; + bam.msg = msg->data; + + request = ofputil_encode_bundle_add(vconn->version, &bam); + ofpmsg_update_length(request); + + error = vconn_send_block(vconn, request); + if (!error) { + /* Check for an error return, so that the socket buffer does not become + * full of errors. */ + vconn_recv_error(vconn, error_reporter); + } + return error; +} + +int +vconn_bundle_transact(struct vconn *vconn, struct ovs_list *requests, + uint16_t flags, + void (*error_reporter)(const struct ofp_header *)) +{ + struct ofputil_bundle_ctrl_msg bc; + struct ofpbuf *request; + int error; + + memset(&bc, 0, sizeof bc); + bc.flags = flags; + error = vconn_bundle_control_transact(vconn, &bc, OFPBCT_OPEN_REQUEST, + error_reporter); + if (error) { + return error; + } + + LIST_FOR_EACH (request, list_node, requests) { + error = vconn_bundle_add_msg(vconn, &bc, request, error_reporter); + if (error) { + break; + } + } + + if (!error) { + error = vconn_bundle_control_transact(vconn, &bc, + OFPBCT_COMMIT_REQUEST, + error_reporter); + } else { + /* Do not overwrite the error code from vconn_bundle_add_msg(). + * Any error in discard should be either reported or logged, so it + * should not get lost. */ + vconn_bundle_control_transact(vconn, &bc, OFPBCT_DISCARD_REQUEST, + error_reporter); + } + return error; +} + void vconn_wait(struct vconn *vconn, enum vconn_wait_type wait) { diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index a69719be0..83b2480ea 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -15,6 +15,16 @@ s/ hard_age=[0-9]*,// ' } +# Filter (multiline) vconn debug messages from ovs-vswitchd.log. +# Use with ofctl_strip() +print_vconn_debug () { awk -F\| < ovs-vswitchd.log ' +BEGIN { prt=0 } +/\|vconn\|DBG\|/ { sub(/[ \t]*$/, ""); print $3 "|" $4 "|" $5; prt=1; next } +$4 != "" { prt=0; next } +prt==1 { sub(/[ \t]*$/, ""); print $0 } +' +} + # parse_listening_port [SERVER] # # Parses the TCP or SSL port on which a server is listening from the diff --git a/tests/ofproto.at b/tests/ofproto.at index be1b298a6..1fa5b2d80 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -3450,3 +3450,247 @@ OFPT_BARRIER_REPLY (OF1.4): OVS_VSWITCHD_STOP AT_CLEANUP + + +AT_SETUP([ofproto - bundle with multiple flow mods (OpenFlow 1.4)]) +AT_KEYWORDS([monitor]) +OVS_VSWITCHD_START + +AT_CHECK([ovs-appctl vlog/set vconn:dbg]) + +AT_CHECK([ovs-ofctl del-flows br0]) + +AT_DATA([flows.txt], [dnl +add idle_timeout=50 in_port=2 dl_src=00:66:77:88:99:aa actions=1 +add idle_timeout=60 in_port=2 dl_src=00:77:88:99:aa:bb actions=2 +add idle_timeout=70 in_port=2 dl_src=00:88:99:aa:bb:cc actions=3 +add idle_timeout=50 in_port=2 dl_src=00:66:77:88:99:aa actions=4 +delete +add idle_timeout=50 in_port=2 dl_src=00:66:77:88:99:aa actions=5 +add idle_timeout=60 in_port=2 dl_src=00:77:88:99:aa:bb actions=6 +add idle_timeout=70 in_port=2 dl_src=00:88:99:aa:bb:cc actions=7 +delete in_port=2 dl_src=00:88:99:aa:bb:cc +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl + idle_timeout=50, in_port=2,dl_src=00:66:77:88:99:aa actions=output:5 + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:6 +NXST_FLOW reply: +]) + +AT_DATA([flows.txt], [dnl +modify actions=drop +modify_strict in_port=2 dl_src=00:77:88:99:aa:bb actions=7 +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl + idle_timeout=50, in_port=2,dl_src=00:66:77:88:99:aa actions=drop + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:7 +NXST_FLOW reply: +]) + +# Adding an existing flow acts as a modify, and delete_strict also works. +AT_DATA([flows.txt], [dnl +add idle_timeout=60 in_port=2 dl_src=00:77:88:99:aa:bb actions=8 +delete_strict in_port=2 dl_src=00:66:77:88:99:aa +add in_port=2 dl_src=00:66:77:88:99:aa actions=drop +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:8 + in_port=2,dl_src=00:66:77:88:99:aa actions=drop +NXST_FLOW reply: +]) + +dnl Check logs for OpenFlow trace +# Prevent race. +OVS_WAIT_UNTIL([test `grep -- "|vconn|DBG|unix: sent (Success): NXST_FLOW reply" ovs-vswitchd.log | wc -l` -ge 3]) +AT_CHECK([print_vconn_debug | ofctl_strip], [0], [dnl +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO: + version bitmap: 0x01 +vconn|DBG|unix: negotiated OpenFlow version 0x01 (we support version 0x06 and earlier, peer supports version 0x01) +vconn|DBG|unix: received: OFPT_FLOW_MOD: DEL actions=drop +vconn|DBG|unix: received: OFPT_BARRIER_REQUEST: +vconn|DBG|unix: sent (Success): OFPT_BARRIER_REPLY: +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO (OF1.4): + version bitmap: 0x01, 0x05 +vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REPLY flags=0 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa idle:50 actions=output:1 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:77:88:99:aa:bb idle:60 actions=output:2 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:88:99:aa:bb:cc idle:70 actions=output:3 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa idle:50 actions=output:4 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): DEL table:255 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa idle:50 actions=output:5 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:77:88:99:aa:bb idle:60 actions=output:6 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:88:99:aa:bb:cc idle:70 actions=output:7 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): DEL table:255 in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REPLY flags=0 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO: + version bitmap: 0x01 +vconn|DBG|unix: negotiated OpenFlow version 0x01 (we support version 0x06 and earlier, peer supports version 0x01) +vconn|DBG|unix: received: NXT_SET_FLOW_FORMAT: format=nxm +vconn|DBG|unix: received: OFPT_BARRIER_REQUEST: +vconn|DBG|unix: sent (Success): OFPT_BARRIER_REPLY: +vconn|DBG|unix: received: NXST_FLOW request: +vconn|DBG|unix: sent (Success): NXST_FLOW reply: + idle_timeout=50, in_port=2,dl_src=00:66:77:88:99:aa actions=output:5 + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:6 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO (OF1.4): + version bitmap: 0x01, 0x05 +vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REPLY flags=0 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): MOD actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): MOD_STRICT in_port=2,dl_src=00:77:88:99:aa:bb actions=output:7 +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REPLY flags=0 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO: + version bitmap: 0x01 +vconn|DBG|unix: negotiated OpenFlow version 0x01 (we support version 0x06 and earlier, peer supports version 0x01) +vconn|DBG|unix: received: NXT_SET_FLOW_FORMAT: format=nxm +vconn|DBG|unix: received: OFPT_BARRIER_REQUEST: +vconn|DBG|unix: sent (Success): OFPT_BARRIER_REPLY: +vconn|DBG|unix: received: NXST_FLOW request: +vconn|DBG|unix: sent (Success): NXST_FLOW reply: + idle_timeout=50, in_port=2,dl_src=00:66:77:88:99:aa actions=drop + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:7 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO (OF1.4): + version bitmap: 0x01, 0x05 +vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REPLY flags=0 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:77:88:99:aa:bb idle:60 actions=output:8 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): DEL_STRICT table:255 in_port=2,dl_src=00:66:77:88:99:aa actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REPLY flags=0 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO: + version bitmap: 0x01 +vconn|DBG|unix: negotiated OpenFlow version 0x01 (we support version 0x06 and earlier, peer supports version 0x01) +vconn|DBG|unix: received: NXT_SET_FLOW_FORMAT: format=nxm +vconn|DBG|unix: received: OFPT_BARRIER_REQUEST: +vconn|DBG|unix: sent (Success): OFPT_BARRIER_REPLY: +vconn|DBG|unix: received: NXST_FLOW request: +vconn|DBG|unix: sent (Success): NXST_FLOW reply: + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:8 + in_port=2,dl_src=00:66:77:88:99:aa actions=drop +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + + +AT_SETUP([ofproto - failing bundle commit (OpenFlow 1.4)]) +AT_KEYWORDS([monitor]) +OVS_VSWITCHD_START + +AT_CHECK([ovs-ofctl del-flows br0]) + +ovs-ofctl add-flows br0 - <&1 | sed '/|WARN|/d +s/unix:.*br0\.mgmt/unix:br0.mgmt/'], [0], [dnl +OFPT_ERROR (OF1.4) (xid=0xb): OFPBRC_EPERM +OFPT_FLOW_MOD (OF1.4) (xid=0xb): ADD table:254 actions=drop +OFPT_ERROR (OF1.4) (xid=0xd): OFPBFC_MSG_FAILED +OFPT_BUNDLE_CONTROL (OF1.4) (xid=0xd): + bundle_id=0 type=COMMIT_REQUEST flags=ordered +ovs-ofctl: talking to unix:br0.mgmt (Protocol error) +]) + +AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl + idle_timeout=50, in_port=2,dl_src=00:66:77:88:99:aa actions=output:11 + idle_timeout=60, in_port=2,dl_src=00:77:88:99:aa:bb actions=output:22 + idle_timeout=70, in_port=2,dl_src=00:88:99:aa:bb:cc actions=output:33 +NXST_FLOW reply: +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index 1e128278d..b7db9bb78 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -2813,3 +2813,110 @@ AT_CHECK([ovs-ofctl -O OpenFlow14 dump-flows br0 | ofctl_strip | sed '/OFPST_FLO OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([ovs-ofctl replace-flows with --bundle]) +OVS_VSWITCHD_START + +AT_CHECK([ovs-appctl vlog/set vconn:dbg]) + +dnl Add flows to br0 with importance via OF1.4+, using an OF1.4+ bundle. For more details refer "ovs-ofctl rule with importance" test case. +for i in 1 2 3 4 5 6 7 8; do echo "dl_vlan=$i,importance=$i,actions=drop"; done > add-flows.txt +AT_CHECK([ovs-ofctl --bundle add-flows br0 add-flows.txt]) + +dnl Replace some flows in the bridge. +for i in 1 3 5 7; do echo "dl_vlan=$i,importance=`expr $i + 10`,actions=drop"; done > replace-flows.txt +AT_CHECK([ovs-ofctl --bundle replace-flows br0 replace-flows.txt]) + +dnl Dump them and compare the dump flows output against the expected output. +for i in 1 2 3 4 5 6 7 8; do if [[ `expr $i % 2` -eq 1 ]]; then importance=`expr $i + 10`; else importance=$i; fi; echo " importance=$importance, dl_vlan=$i actions=drop"; done | sort > expout +AT_CHECK([ovs-ofctl -O OpenFlow14 dump-flows br0 | ofctl_strip | sed '/OFPST_FLOW/d' | sort], + [0], [expout]) + +dnl Check logs for OpenFlow trace +# Prevent race. +OVS_WAIT_UNTIL([test `grep -- "|vconn|DBG|unix: sent (Success): OFPST_FLOW reply" ovs-vswitchd.log | wc -l` -ge 2]) +# AT_CHECK([sed -n "s/^.*\(|vconn|DBG|.*xid=.*:\).*$/\1/p" ovs-vswitchd.log], [0], [dnl +AT_CHECK([print_vconn_debug | ofctl_strip], [0], [dnl +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO (OF1.4): + version bitmap: 0x01, 0x05 +vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REPLY flags=0 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=1 importance:1 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=2 importance:2 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=3 importance:3 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=4 importance:4 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=5 importance:5 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=6 importance:6 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=7 importance:7 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=8 importance:8 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REPLY flags=0 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO (OF1.4): + version bitmap: 0x01, 0x05 +vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) +vconn|DBG|unix: received: OFPST_FLOW request (OF1.4): +vconn|DBG|unix: sent (Success): OFPST_FLOW reply (OF1.4): +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=OPEN_REPLY flags=0 +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=1 importance:11 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=3 importance:13 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=5 importance:15 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): + bundle_id=0 flags=ordered +OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=7 importance:17 actions=drop +vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REQUEST flags=ordered +vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): + bundle_id=0 type=COMMIT_REPLY flags=0 +vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): + version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 +vconn|DBG|unix: received: OFPT_HELLO (OF1.4): + version bitmap: 0x05 +vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports version 0x05) +vconn|DBG|unix: received: OFPST_FLOW request (OF1.4): +vconn|DBG|unix: sent (Success): OFPST_FLOW reply (OF1.4): + importance=11, dl_vlan=1 actions=drop + importance=2, dl_vlan=2 actions=drop + importance=13, dl_vlan=3 actions=drop + importance=4, dl_vlan=4 actions=drop + importance=15, dl_vlan=5 actions=drop + importance=6, dl_vlan=6 actions=drop + importance=17, dl_vlan=7 actions=drop + importance=8, dl_vlan=8 actions=drop +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index c667aa4b5..2c6a07332 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -296,29 +296,39 @@ Print meter features. . These commands manage the flow table in an OpenFlow switch. In each case, \fIflow\fR specifies a flow entry in the format described in -\fBFlow Syntax\fR, below, and \fIfile\fR is a text file that contains -zero or more flows in the same syntax, one per line. +\fBFlow Syntax\fR, below, \fIfile\fR is a text file that contains zero +or more flows in the same syntax, one per line, and the optional +\fB\-\-bundle\fR option operates the command as a single transation, +see option \fB\-\-bundle\fR, below. . -.IP "\fBadd\-flow \fIswitch flow\fR" -.IQ "\fBadd\-flow \fIswitch \fB\- < \fIfile\fR" -.IQ "\fBadd\-flows \fIswitch file\fR" +.IP "[\fB\-\-bundle\fR] \fBadd\-flow \fIswitch flow\fR" +.IQ "[\fB\-\-bundle\fR] \fBadd\-flow \fIswitch \fB\- < \fIfile\fR" +.IQ "[\fB\-\-bundle\fR] \fBadd\-flows \fIswitch file\fR" Add each flow entry to \fIswitch\fR's tables. . -.IP "[\fB\-\-strict\fR] \fBmod\-flows \fIswitch flow\fR" -.IQ "[\fB\-\-strict\fR] \fBmod\-flows \fIswitch \fB\- < \fIfile\fR" +Each flow specification (e.g., each line in \fIfile\fR) may start with +\fBadd\fR, \fBmodify\fR, \fBdelete\fR, \fBmodify_strict\fR, or +\fBdelete_strict\fR keyword to specify whether a flow is to be added, +modified, or deleted, and whether the modify or delete is strict or +not. For backwards compatibility a flow specification without one of +these keywords is treated as a flow add. All flow mods are executed +in the order specified. +. +.IP "[\fB\-\-bundle\fR] [\fB\-\-strict\fR] \fBmod\-flows \fIswitch flow\fR" +.IQ "[\fB\-\-bundle\fR] [\fB\-\-strict\fR] \fBmod\-flows \fIswitch \fB\- < \fIfile\fR" Modify the actions in entries from \fIswitch\fR's tables that match the specified flows. With \fB\-\-strict\fR, wildcards are not treated as active for matching purposes. . -.IP "\fBdel\-flows \fIswitch\fR" -.IQ "[\fB\-\-strict\fR] \fBdel\-flows \fIswitch \fR[\fIflow\fR]" -.IQ "[\fB\-\-strict\fR] \fBdel\-flows \fIswitch \fB\- < \fIfile\fR" +.IP "[\fB\-\-bundle\fR] \fBdel\-flows \fIswitch\fR" +.IQ "[\fB\-\-bundle\fR] [\fB\-\-strict\fR] \fBdel\-flows \fIswitch \fR[\fIflow\fR]" +.IQ "[\fB\-\-bundle\fR] [\fB\-\-strict\fR] \fBdel\-flows \fIswitch \fB\- < \fIfile\fR" Deletes entries from \fIswitch\fR's flow table. With only a \fIswitch\fR argument, deletes all flows. Otherwise, deletes flow entries that match the specified flows. With \fB\-\-strict\fR, wildcards are not treated as active for matching purposes. . -.IP "[\fB\-\-readd\fR] \fBreplace\-flows \fIswitch file\fR" +.IP "[\fB\-\-bundle\fR] [\fB\-\-readd\fR] \fBreplace\-flows \fIswitch file\fR" Reads flow entries from \fIfile\fR (or \fBstdin\fR if \fIfile\fR is \fB\-\fR) and queries the flow table from \fIswitch\fR. Then it fixes up any differences, adding flows from \fIflow\fR that are missing on @@ -2386,6 +2396,32 @@ depending on its configuration. \fB\-\-strict\fR Uses strict matching when running flow modification commands. . +.IP "\fB\-\-bundle\fR" +Execute flow mods as an OpenFlow 1.4 bundle transaction. +.RS +.IP \(bu +Within a bundle, all flow mods are processed in the order they appear +and as a single transaction, meaning that if one of them fails, the +whole transaction fails and none of the changes are made to the +\fIswitch\fR's flow table. +.IP \(bu +The beginning and the end of the flow table modification commands in a +bundle are delimited with OpenFlow 1.4 bundle control messages, which +makes it possible to stream the included commands without explicit +OpenFlow barriers, which are otherwise used after each flow table +modification command. This may make large modifications execute +faster as a bundle. +.IP \(bu +Bundles require OpenFlow 1.4 or higher. An explicit \fB-O +OpenFlow14\fR option is not needed, but you may need to enable +OpenFlow 1.4 support for OVS by setting the OVSDB \fIprotocols\fR +column in the \fIbridge\fR table. +.IP \(bu +Current implementation executes all bundles with the 'ordered' flag, +so that the flow mods are always executed in the order specified. +Atomic bundles are not yet supported. +.RE +. .so lib/ofp-version.man . .IP "\fB\-F \fIformat\fR[\fB,\fIformat\fR...]" diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 54a5bb8d0..10d44e0ef 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -67,6 +67,12 @@ VLOG_DEFINE_THIS_MODULE(ofctl); +/* --bundle: Use OpenFlow 1.4 bundle for making the flow table change atomic. + * NOTE: Also the flow mod will use OpenFlow 1.4, so the semantics may be + * different (see the comment in parse_options() for details). + */ +static bool bundle = false; + /* --strict: Use strict matching for flow mod commands? Additionally governs * use of nx_pull_match() instead of nx_pull_match_loose() in parse-nx-match. */ @@ -159,6 +165,7 @@ parse_options(int argc, char *argv[]) OPT_SORT, OPT_RSORT, OPT_UNIXCTL, + OPT_BUNDLE, DAEMON_OPTION_ENUMS, OFP_VERSION_OPTION_ENUMS, VLOG_OPTION_ENUMS @@ -176,6 +183,7 @@ parse_options(int argc, char *argv[]) {"unixctl", required_argument, NULL, OPT_UNIXCTL}, {"help", no_argument, NULL, 'h'}, {"option", no_argument, NULL, 'o'}, + {"bundle", no_argument, NULL, OPT_BUNDLE}, DAEMON_LONG_OPTIONS, OFP_VERSION_LONG_OPTIONS, VLOG_LONG_OPTIONS, @@ -249,6 +257,10 @@ parse_options(int argc, char *argv[]) ovs_cmdl_print_options(long_options); exit(EXIT_SUCCESS); + case OPT_BUNDLE: + bundle = true; + break; + case OPT_STRICT: strict = true; break; @@ -293,6 +305,12 @@ parse_options(int argc, char *argv[]) free(short_options); + /* Implicit OpenFlow 1.4 with the '--bundle' option. */ + if (bundle) { + /* Add implicit allowance for OpenFlow 1.4. */ + add_allowed_ofp_versions(ofputil_protocols_to_version_bitmap( + OFPUTIL_P_OF14_OXM)); + } versions = get_allowed_ofp_versions(); version_protocols = ofputil_protocols_from_version_bitmap(versions); if (!(allowed_protocols & version_protocols)) { @@ -496,7 +514,6 @@ open_vconn(const char *name, struct vconn **vconnp) static void send_openflow_buffer(struct vconn *vconn, struct ofpbuf *buffer) { - ofpmsg_update_length(buffer); run(vconn_send_block(vconn, buffer), "failed to send packet to switch"); } @@ -505,7 +522,6 @@ dump_transaction(struct vconn *vconn, struct ofpbuf *request) { struct ofpbuf *reply; - ofpmsg_update_length(request); run(vconn_transact(vconn, request, &reply), "talking to %s", vconn_get_name(vconn)); ofp_print(stdout, reply->data, reply->size, verbosity + 1); @@ -587,11 +603,7 @@ dump_trivial_stats_transaction(const char *vconn_name, enum ofpraw raw) static void transact_multiple_noreply(struct vconn *vconn, struct ovs_list *requests) { - struct ofpbuf *request, *reply; - - LIST_FOR_EACH (request, list_node, requests) { - ofpmsg_update_length(request); - } + struct ofpbuf *reply; run(vconn_transact_multiple_noreply(vconn, requests, &reply), "talking to %s", vconn_get_name(vconn)); @@ -602,6 +614,20 @@ transact_multiple_noreply(struct vconn *vconn, struct ovs_list *requests) ofpbuf_delete(reply); } +static void +bundle_error_reporter(const struct ofp_header *oh) +{ + ofp_print(stderr, oh, ntohs(oh->length), verbosity + 1); + fflush(stderr); +} + +static void +bundle_transact(struct vconn *vconn, struct ovs_list *requests, uint16_t flags) +{ + run(vconn_bundle_transact(vconn, requests, flags, bundle_error_reporter), + "talking to %s", vconn_get_name(vconn)); +} + /* Sends 'request', which should be a request that only has a reply if an error * occurs, and waits for it to succeed or fail. If an error does occur, prints * it and exits with an error. @@ -1174,6 +1200,33 @@ open_vconn_for_flow_mod(const char *remote, struct vconn **vconnp, "formats (%s)", usable_s); } +static void +bundle_flow_mod__(const char *remote, struct ofputil_flow_mod *fms, + size_t n_fms, enum ofputil_protocol usable_protocols) +{ + enum ofputil_protocol protocol; + struct vconn *vconn; + struct ovs_list requests; + size_t i; + + list_init(&requests); + + /* Bundles need OpenFlow 1.4+. */ + usable_protocols &= OFPUTIL_P_OF14_UP; + protocol = open_vconn_for_flow_mod(remote, &vconn, usable_protocols); + + for (i = 0; i < n_fms; i++) { + struct ofputil_flow_mod *fm = &fms[i]; + struct ofpbuf *request = ofputil_encode_flow_mod(fm, protocol); + + list_push_back(&requests, &request->list_node); + free(CONST_CAST(struct ofpact *, fm->ofpacts)); + } + + bundle_transact(vconn, &requests, OFPBF_ORDERED); + vconn_close(vconn); +} + static void ofctl_flow_mod__(const char *remote, struct ofputil_flow_mod *fms, size_t n_fms, enum ofputil_protocol usable_protocols) @@ -1182,6 +1235,11 @@ ofctl_flow_mod__(const char *remote, struct ofputil_flow_mod *fms, struct vconn *vconn; size_t i; + if (bundle) { + bundle_flow_mod__(remote, fms, n_fms, usable_protocols); + return; + } + protocol = open_vconn_for_flow_mod(remote, &vconn, usable_protocols); for (i = 0; i < n_fms; i++) { @@ -1194,13 +1252,19 @@ ofctl_flow_mod__(const char *remote, struct ofputil_flow_mod *fms, } static void -ofctl_flow_mod_file(int argc OVS_UNUSED, char *argv[], uint16_t command) +ofctl_flow_mod_file(int argc OVS_UNUSED, char *argv[], int command) { enum ofputil_protocol usable_protocols; struct ofputil_flow_mod *fms = NULL; size_t n_fms = 0; char *error; + if (command == OFPFC_ADD) { + /* Allow the file to specify a mix of commands. If none specified at + * the beginning of any given line, then the default is OFPFC_ADD, so + * this is backwards compatible. */ + command = -2; + } error = parse_ofp_flow_mod_file(argv[2], command, &fms, &n_fms, &usable_protocols); if (error) { @@ -2636,7 +2700,11 @@ ofctl_replace_flows(struct ovs_cmdl_context *ctx) fte_make_flow_mod(fte, FILE_IDX, OFPFC_ADD, protocol, &requests); } } - transact_multiple_noreply(vconn, &requests); + if (bundle) { + bundle_transact(vconn, &requests, OFPBF_ORDERED); + } else { + transact_multiple_noreply(vconn, &requests); + } vconn_close(vconn); fte_free_all(&cls); From 2b7b1427c177d28ec63632646ce896eec7f162b6 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 9 Jun 2015 17:00:00 -0700 Subject: [PATCH 132/146] classifier: Support table versioning This patch allows classifier rules to become visible and invisible in specific versions. A 'version' is defined as a positive monotonically increasing integer, which never wraps around. The new 'visibility' attribute replaces the prior 'to_be_removed' and 'visible' attributes. When versioning is not used, the 'version' parameter should be passed as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when looking up flows. This feature enables the support for atomic OpenFlow bundles without significant performance penalty on 64-bit systems. There is a performance decrease in 32-bit systems due to 64-bit atomics used. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/classifier-private.h | 52 ++++++++- lib/classifier.c | 223 ++++++++++++++++++++++++--------------- lib/classifier.h | 138 ++++++++++++++++-------- lib/ovs-router.c | 7 +- lib/tnl-ports.c | 9 +- ofproto/ofproto-dpif.c | 2 +- ofproto/ofproto.c | 94 ++++++++--------- tests/test-classifier.c | 6 +- utilities/ovs-ofctl.c | 3 +- 9 files changed, 343 insertions(+), 191 deletions(-) diff --git a/lib/classifier-private.h b/lib/classifier-private.h index a7edbe93b..2703b75a7 100644 --- a/lib/classifier-private.h +++ b/lib/classifier-private.h @@ -79,13 +79,63 @@ struct cls_match { * 'indices'. */ /* Accessed by all readers. */ struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */ - bool visible; + + /* Controls rule's visibility to lookups. + * + * When 'visibility' is: + * + * > 0 - rule is visible starting from version 'visibility' + * <= 0 - rule is invisible starting from version '-(visibility)' + * + * The minimum version number used in lookups is 1 (== CLS_NO_VERSION), + * which implies that when 'visibility' is: + * + * 1 - rule is visible in all lookup versions + * 0 - rule is invisible in all lookup versions. */ + atomic_llong visibility; + const struct cls_rule *cls_rule; OVSRCU_TYPE(struct cls_conjunction_set *) conj_set; const struct miniflow flow; /* Matching rule. Mask is in the subtable. */ /* 'flow' must be the last field. */ }; +static inline void +cls_match_set_visibility(struct cls_match *rule, long long version) +{ + atomic_store_relaxed(&rule->visibility, version); +} + +static inline bool +cls_match_visible_in_version(const struct cls_match *rule, long long version) +{ + long long visibility; + + /* C11 does not want to access an atomic via a const object pointer. */ + atomic_read_relaxed(&CONST_CAST(struct cls_match *, rule)->visibility, + &visibility); + + if (OVS_LIKELY(visibility > 0)) { + /* Rule is visible starting from version 'visibility'. */ + return version >= visibility; + } else { + /* Rule is invisible starting from version '-visibility'. */ + return version < -visibility; + } +} + +static inline bool +cls_match_is_eventually_invisible(const struct cls_match *rule) +{ + long long visibility; + + /* C11 does not want to access an atomic via a const object pointer. */ + atomic_read_relaxed(&CONST_CAST(struct cls_match *, rule)->visibility, + &visibility); + + return visibility <= 0; +} + /* A longest-prefix match tree. */ struct trie_node { uint32_t prefix; /* Prefix bits for this node, MSB first. */ diff --git a/lib/classifier.c b/lib/classifier.c index 6075cf785..2b2d3f646 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -99,7 +99,7 @@ cls_match_alloc(const struct cls_rule *rule, rculist_init(&cls_match->list); *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule; *CONST_CAST(int *, &cls_match->priority) = rule->priority; - cls_match->visible = false; + atomic_init(&cls_match->visibility, 0); /* Initially invisible. */ miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow), &rule->match.flow, count); ovsrcu_set_hidden(&cls_match->conj_set, @@ -115,6 +115,7 @@ static struct cls_subtable *insert_subtable(struct classifier *cls, static void destroy_subtable(struct classifier *cls, struct cls_subtable *); static const struct cls_match *find_match_wc(const struct cls_subtable *, + long long version, const struct flow *, struct trie_ctx *, unsigned int n_tries, @@ -139,12 +140,12 @@ next_rule_in_list(const struct cls_match *rule, const struct cls_match *head) /* Return the next lower-priority rule in the list that is visible. Multiple * identical rules with the same priority may exist transitionally. In that - * case the first rule of a given priority has been marked as 'to_be_removed', - * and the later rules are marked as '!visible'. This gets a bit complex if - * there are two rules of the same priority in the list, as in that case the - * head and tail of the list will have the same priority. */ + * case the first rule of a given priority has been marked as visible in one + * version and the later rules are marked as visible on the other version. + * This makes it possible to for the head and tail of the list have the same + * priority. */ static inline const struct cls_match * -next_visible_rule_in_list(const struct cls_match *rule) +next_visible_rule_in_list(const struct cls_match *rule, long long version) { const struct cls_match *next = rule; @@ -154,7 +155,7 @@ next_visible_rule_in_list(const struct cls_match *rule) /* We have reached the head of the list, stop. */ return NULL; } - } while (!next->visible); + } while (!cls_match_visible_in_version(next, version)); return next; } @@ -206,11 +207,14 @@ static bool mask_prefix_bits_set(const struct flow_wildcards *, /* cls_rule. */ static inline void -cls_rule_init__(struct cls_rule *rule, unsigned int priority) +cls_rule_init__(struct cls_rule *rule, unsigned int priority, + long long version) { + ovs_assert(version > 0); + rculist_init(&rule->node); - rule->priority = priority; - rule->to_be_removed = false; + *CONST_CAST(int *, &rule->priority) = priority; + *CONST_CAST(long long *, &rule->version) = version; rule->cls_match = NULL; } @@ -223,19 +227,21 @@ cls_rule_init__(struct cls_rule *rule, unsigned int priority) * Clients should not use priority INT_MIN. (OpenFlow uses priorities between * 0 and UINT16_MAX, inclusive.) */ void -cls_rule_init(struct cls_rule *rule, const struct match *match, int priority) +cls_rule_init(struct cls_rule *rule, const struct match *match, int priority, + long long version) { - cls_rule_init__(rule, priority); - minimatch_init(&rule->match, match); + cls_rule_init__(rule, priority, version); + minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match); } /* Same as cls_rule_init() for initialization from a "struct minimatch". */ void cls_rule_init_from_minimatch(struct cls_rule *rule, - const struct minimatch *match, int priority) + const struct minimatch *match, int priority, + long long version) { - cls_rule_init__(rule, priority); - minimatch_clone(&rule->match, match); + cls_rule_init__(rule, priority, version); + minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match); } /* Initializes 'dst' as a copy of 'src'. @@ -244,20 +250,21 @@ cls_rule_init_from_minimatch(struct cls_rule *rule, void cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) { - cls_rule_init__(dst, src->priority); - minimatch_clone(&dst->match, &src->match); + cls_rule_init__(dst, src->priority, src->version); + minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match); } /* Initializes 'dst' with the data in 'src', destroying 'src'. + * * 'src' must be a cls_rule NOT in a classifier. * * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ void cls_rule_move(struct cls_rule *dst, struct cls_rule *src) { - ovs_assert(!src->cls_match); /* Must not be in a classifier. */ - cls_rule_init__(dst, src->priority); - minimatch_move(&dst->match, &src->match); + cls_rule_init__(dst, src->priority, src->version); + minimatch_move(CONST_CAST(struct minimatch *, &dst->match), + CONST_CAST(struct minimatch *, &src->match)); } /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's @@ -275,7 +282,7 @@ cls_rule_destroy(struct cls_rule *rule) ovs_assert(rculist_next_protected(&rule->node) == RCULIST_POISON || rculist_is_empty(&rule->node)); - minimatch_destroy(&rule->match); + minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match)); } void @@ -327,15 +334,53 @@ cls_rule_is_catchall(const struct cls_rule *rule) return minimask_is_catchall(&rule->match.mask); } -/* Rules inserted during classifier_defer() need to be made visible before - * calling classifier_publish(). +/* Makes rule invisible after 'version'. Once that version is made invisible + * (by changing the version parameter used in lookups), the rule should be + * actually removed via ovsrcu_postpone(). * - * 'rule' must be in a classifier. */ -void cls_rule_make_visible(const struct cls_rule *rule) + * 'rule_' must be in a classifier. */ +void +cls_rule_make_invisible_in_version(const struct cls_rule *rule_, + long long version, long long lookup_version) { - rule->cls_match->visible = true; + struct cls_match *rule = rule_->cls_match; + + /* XXX: Adjust when versioning is actually used. */ + ovs_assert(version >= rule_->version && version >= lookup_version); + + /* Normally, we call this when deleting a rule that is already visible to + * lookups. However, sometimes a bundle transaction will add a rule and + * then delete it before the rule has ever become visible. If we set such + * a rule to become invisible in a future 'version', it would become + * visible to all prior versions. So, in this case we must set the rule + * visibility to 0 (== never visible). */ + if (cls_match_visible_in_version(rule, lookup_version)) { + /* Make invisible starting at 'version'. */ + atomic_store_relaxed(&rule->visibility, -version); + } else { + /* Rule has not yet been visible to lookups, make invisible in all + * version. */ + atomic_store_relaxed(&rule->visibility, 0); + } } +/* This undoes the change made by cls_rule_make_invisible_after_version(). + * + * 'rule' must be in a classifier. */ +void +cls_rule_restore_visibility(const struct cls_rule *rule) +{ + atomic_store_relaxed(&rule->cls_match->visibility, rule->version); +} + +/* Return true if 'rule' is visible in 'version'. + * + * 'rule' must be in a classifier. */ +bool +cls_rule_visible_in_version(const struct cls_rule *rule, long long version) +{ + return cls_match_visible_in_version(rule->cls_match, version); +} /* Initializes 'cls' as a classifier that initially contains no classification * rules. */ @@ -597,7 +642,7 @@ const struct cls_rule * classifier_replace(struct classifier *cls, const struct cls_rule *rule, const struct cls_conjunction *conjs, size_t n_conjs) { - struct cls_match *new = cls_match_alloc(rule, conjs, n_conjs); + struct cls_match *new; struct cls_subtable *subtable; uint32_t ihash[CLS_MAX_INDICES]; uint8_t prev_be64ofs = 0; @@ -607,6 +652,11 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, uint32_t hash; int i; + ovs_assert(rule->version > 0); + + /* 'new' is initially invisible to lookups. */ + new = cls_match_alloc(rule, conjs, n_conjs); + CONST_CAST(struct cls_rule *, rule)->cls_match = new; subtable = find_subtable(cls, &rule->match.mask); @@ -673,12 +723,12 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, struct cls_match *iter; /* Scan the list for the insertion point that will keep the list in - * order of decreasing priority. - * Insert after 'to_be_removed' rules of the same priority. */ + * order of decreasing priority. Insert after rules marked invisible + * in any version of the same priority. */ FOR_EACH_RULE_IN_LIST_PROTECTED (iter, head) { if (rule->priority > iter->priority || (rule->priority == iter->priority - && !iter->cls_rule->to_be_removed)) { + && !cls_match_is_eventually_invisible(iter))) { break; } } @@ -716,8 +766,8 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, /* No change in subtable's max priority or max count. */ - /* Make rule visible to lookups? */ - new->visible = cls->publish; + /* Make 'new' visible to lookups in the appropriate version. */ + cls_match_set_visibility(new, rule->version); /* Make rule visible to iterators (immediately). */ rculist_replace(CONST_CAST(struct rculist *, &rule->node), @@ -732,8 +782,8 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, } } - /* Make rule visible to lookups? */ - new->visible = cls->publish; + /* Make 'new' visible to lookups in the appropriate version. */ + cls_match_set_visibility(new, rule->version); /* Make rule visible to iterators (immediately). */ rculist_push_back(&subtable->rules_list, @@ -1026,8 +1076,9 @@ free_conjunctive_matches(struct hmap *matches, * 'flow' is non-const to allow for temporary modifications during the lookup. * Any changes are restored before returning. */ static const struct cls_rule * -classifier_lookup__(const struct classifier *cls, struct flow *flow, - struct flow_wildcards *wc, bool allow_conjunctive_matches) +classifier_lookup__(const struct classifier *cls, long long version, + struct flow *flow, struct flow_wildcards *wc, + bool allow_conjunctive_matches) { const struct cls_partition *partition; struct trie_ctx trie_ctx[CLS_MAX_TRIES]; @@ -1094,7 +1145,8 @@ classifier_lookup__(const struct classifier *cls, struct flow *flow, /* Skip subtables with no match, or where the match is lower-priority * than some certain match we've already found. */ - match = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc); + match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries, + wc); if (!match || match->priority <= hard_pri) { continue; } @@ -1218,7 +1270,7 @@ classifier_lookup__(const struct classifier *cls, struct flow *flow, const struct cls_rule *rule; flow->conj_id = id; - rule = classifier_lookup__(cls, flow, wc, false); + rule = classifier_lookup__(cls, version, flow, wc, false); flow->conj_id = saved_conj_id; if (rule) { @@ -1246,7 +1298,7 @@ classifier_lookup__(const struct classifier *cls, struct flow *flow, } /* Find next-lower-priority flow with identical flow match. */ - match = next_visible_rule_in_list(soft[i]->match); + match = next_visible_rule_in_list(soft[i]->match, version); if (match) { soft[i] = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set); @@ -1271,9 +1323,10 @@ classifier_lookup__(const struct classifier *cls, struct flow *flow, return hard ? hard->cls_rule : NULL; } -/* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. - * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules - * of equal priority match 'flow', returns one arbitrarily. +/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and + * that is visible in 'version'. Returns a null pointer if no rules in 'cls' + * match 'flow'. If multiple rules of equal priority match 'flow', returns one + * arbitrarily. * * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the * set of bits that were significant in the lookup. At some point @@ -1283,18 +1336,16 @@ classifier_lookup__(const struct classifier *cls, struct flow *flow, * 'flow' is non-const to allow for temporary modifications during the lookup. * Any changes are restored before returning. */ const struct cls_rule * -classifier_lookup(const struct classifier *cls, struct flow *flow, - struct flow_wildcards *wc) +classifier_lookup(const struct classifier *cls, long long version, + struct flow *flow, struct flow_wildcards *wc) { - return classifier_lookup__(cls, flow, wc, true); + return classifier_lookup__(cls, version, flow, wc, true); } /* Finds and returns a rule in 'cls' with exactly the same priority and - * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't - * contain an exact match. - * - * Returns the first matching rule that is not 'to_be_removed'. Only one such - * rule may exist. */ + * matching criteria as 'target', and that is visible in 'target->version. + * Only one such rule may ever exist. Returns a null pointer if 'cls' doesn't + * contain an exact match. */ const struct cls_rule * classifier_find_rule_exactly(const struct classifier *cls, const struct cls_rule *target) @@ -1318,7 +1369,7 @@ classifier_find_rule_exactly(const struct classifier *cls, break; /* Not found. */ } if (rule->priority == target->priority - && !rule->cls_rule->to_be_removed) { + && cls_match_visible_in_version(rule, target->version)) { return rule->cls_rule; } } @@ -1326,16 +1377,18 @@ classifier_find_rule_exactly(const struct classifier *cls, } /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the - * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't - * contain an exact match. */ + * same matching criteria as 'target', and that is visible in 'version'. + * Returns a null pointer if 'cls' doesn't contain an exact match visible in + * 'version'. */ const struct cls_rule * classifier_find_match_exactly(const struct classifier *cls, - const struct match *target, int priority) + const struct match *target, int priority, + long long version) { const struct cls_rule *retval; struct cls_rule cr; - cls_rule_init(&cr, target, priority); + cls_rule_init(&cr, target, priority, version); retval = classifier_find_rule_exactly(cls, &cr); cls_rule_destroy(&cr); @@ -1344,16 +1397,12 @@ classifier_find_match_exactly(const struct classifier *cls, /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are * considered to overlap if both rules have the same priority and a packet - * could match both. + * could match both, and if both rules are visible in the same version. * * A trivial example of overlapping rules is two rules matching disjoint sets * of fields. E.g., if one rule matches only on port number, while another only * on dl_type, any packet from that specific port and with that specific - * dl_type could match both, if the rules also have the same priority. - * - * 'target' is not considered to overlap with a rule that has been marked - * as 'to_be_removed'. - */ + * dl_type could match both, if the rules also have the same priority. */ bool classifier_rule_overlaps(const struct classifier *cls, const struct cls_rule *target) @@ -1371,9 +1420,10 @@ classifier_rule_overlaps(const struct classifier *cls, RCULIST_FOR_EACH (rule, node, &subtable->rules_list) { if (rule->priority == target->priority - && !rule->to_be_removed && miniflow_equal_in_minimask(&target->match.flow, - &rule->match.flow, &mask)) { + &rule->match.flow, &mask) + && cls_match_visible_in_version(rule->cls_match, + target->version)) { return true; } } @@ -1425,16 +1475,17 @@ cls_rule_is_loose_match(const struct cls_rule *rule, /* Iteration. */ +/* Rule may only match a target if it is visible in target's version. For NULL + * target we only return rules that are not invisible in any version. */ static bool rule_matches(const struct cls_rule *rule, const struct cls_rule *target) { - /* Iterators never see rules that have been marked for removal. - * This allows them to be oblivious of duplicate rules. */ - return (!rule->to_be_removed && - (!target - || miniflow_equal_in_minimask(&rule->match.flow, - &target->match.flow, - &target->match.mask))); + /* Iterators never see duplicate rules with the same priority. */ + return target + ? (miniflow_equal_in_minimask(&rule->match.flow, &target->match.flow, + &target->match.mask) + && cls_match_visible_in_version(rule->cls_match, target->version)) + : !cls_match_is_eventually_invisible(rule->cls_match); } static const struct cls_rule * @@ -1457,10 +1508,13 @@ search_subtable(const struct cls_subtable *subtable, /* Initializes 'cursor' for iterating through rules in 'cls', and returns the * first matching cls_rule via '*pnode', or NULL if there are no matches. * - * - If 'target' is null, the cursor will visit every rule in 'cls'. + * - If 'target' is null, or if the 'target' is a catchall target and the + * target's version is CLS_NO_VERSION, the cursor will visit every rule + * in 'cls' that is not invisible in any version. * * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' - * such that cls_rule_is_loose_match(rule, target) returns true. + * such that cls_rule_is_loose_match(rule, target) returns true and that + * the rule is visible in 'target->version'. * * Ignores target->priority. */ struct cls_cursor @@ -1470,7 +1524,9 @@ cls_cursor_start(const struct classifier *cls, const struct cls_rule *target) struct cls_subtable *subtable; cursor.cls = cls; - cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL; + cursor.target = target && (!cls_rule_is_catchall(target) + || target->version != CLS_MAX_VERSION) + ? target : NULL; cursor.rule = NULL; /* Find first rule. */ @@ -1722,8 +1778,8 @@ miniflow_and_mask_matches_flow(const struct miniflow *flow, } static inline const struct cls_match * -find_match(const struct cls_subtable *subtable, const struct flow *flow, - uint32_t hash) +find_match(const struct cls_subtable *subtable, long long version, + const struct flow *flow, uint32_t hash) { const struct cls_match *head, *rule; @@ -1733,7 +1789,7 @@ find_match(const struct cls_subtable *subtable, const struct flow *flow, flow))) { /* Return highest priority rule that is visible. */ FOR_EACH_RULE_IN_LIST(rule, head) { - if (OVS_LIKELY(rule->visible)) { + if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) { return rule; } } @@ -1791,9 +1847,9 @@ fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc, } static const struct cls_match * -find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, - struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, - struct flow_wildcards *wc) +find_match_wc(const struct cls_subtable *subtable, long long version, + const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES], + unsigned int n_tries, struct flow_wildcards *wc) { uint32_t basis = 0, hash; const struct cls_match *rule = NULL; @@ -1801,7 +1857,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, struct range ofs; if (OVS_UNLIKELY(!wc)) { - return find_match(subtable, flow, + return find_match(subtable, version, flow, flow_hash_in_minimask(flow, &subtable->mask, 0)); } @@ -1842,7 +1898,8 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, flow, wc)) { /* Return highest priority rule that is visible. */ FOR_EACH_RULE_IN_LIST(rule, head) { - if (OVS_LIKELY(rule->visible)) { + if (OVS_LIKELY(cls_match_visible_in_version(rule, + version))) { return rule; } } @@ -1859,7 +1916,7 @@ find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, } hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, ofs.end, &basis); - rule = find_match(subtable, flow, hash); + rule = find_match(subtable, version, flow, hash); if (!rule && subtable->ports_mask_len) { /* Ports are always part of the final range, if any. * No match was found for the ports. Use the ports trie to figure out diff --git a/lib/classifier.h b/lib/classifier.h index d69c20191..cb0030abd 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -210,46 +210,98 @@ * Each eliminated subtable lookup also reduces the amount of un-wildcarding. * * - * Tentative Modifications - * ======================= + * Classifier Versioning + * ===================== * - * When a new rule is added to a classifier, it can optionally be "invisible". - * That means that lookups won't find the rule, although iterations through - * the classifier will see it. + * Classifier lookups are always done in a specific classifier version, where + * a version is defined to be a natural number. * - * Similarly, deletions from a classifier can be "tentative", by setting - * 'to_be_removed' to true within the rule. A rule that is tentatively deleted - * will not appear in iterations, although it will still be found by lookups. + * When a new rule is added to a classifier, it is set to become visible in a + * specific version. If the version number used at insert time is larger than + * any version number currently used in lookups, the new rule is said to be + * invisible to lookups. This means that lookups won't find the rule, but the + * rule is immediately available to classifier iterations. + * + * Similarly, a rule can be marked as to be deleted in a future version, or + * more precisely, to be visible upto a given version number. To delete a rule + * in a way to not remove the rule before all ongoing lookups are finished, the + * rule should be marked as "to be deleted" by setting the rule's visibility to + * the negation of the last version number in which it should be visible. + * Then, when all the lookups use a later version number, the rule can be + * actually deleted from the classifier. A rule that is marked for deletion + * after a future version will not appear in iterations, although it will still + * be found by lookups using a lookup version number up to that future version + * number. * * Classifiers can hold duplicate rules (rules with the same match criteria and - * priority) when tentative modifications are involved: one (or more) identical - * tentatively deleted rules can coexist in a classifier with at most one - * identical invisible rule. + * priority) when at most one of the duplicates with the same priority is + * visible in any given lookup version. The caller responsible for classifier + * modifications must maintain this invariant. * - * The classifier supports tentative modifications for two reasons: + * The classifier supports versioning for two reasons: * - * 1. Performance: Adding (or deleting) a rule can, in pathological cases, - * have a cost proportional to the number of rules already in the - * classifier. When multiple rules are being added (or deleted) in one - * go, though, this cost can be paid just once, not once per addition - * (or deletion), as long as it is OK for any new rules to be invisible - * until the batch change is complete. + * 1. Support for versioned modifications makes it possible to perform an + * arbitraty series of classifier changes as one atomic transaction, + * where intermediate versions of the classifier are not visible to any + * lookups. Also, when a rule is added for a future version, or marked + * for removal after the current version, such modifications can be + * reverted without any visible effects to any of the current lookups. * - * 2. Staging additions and deletions: Invisibility allows a rule to be - * added tentatively, to possibly be modified or removed before it - * becomes visible. Tentatively deletion allows a rule to be scheduled - * for deletion before it is certain that the deletion is desirable. + * 2. Performance: Adding (or deleting) a large set of rules can, in + * pathological cases, have a cost proportional to the number of rules + * already in the classifier. When multiple rules are being added (or + * deleted) in one go, though, this pathological case cost can be + * typically avoided, as long as it is OK for any new rules to be + * invisible until the batch change is complete. + * + * Note that the classifier_replace() function replaces a rule immediately, and + * is therefore not safe to use with versioning. It is still available for the + * users that do not use versioning. + * + * + * Deferred Publication + * ==================== + * + * Removing large number of rules from classifier can be costly, as the + * supporting data structures are teared down, in many cases just to be + * re-instantiated right after. In the worst case, as when each rule has a + * different match pattern (mask), the maintenance of the match patterns can + * have cost O(N^2), where N is the number of different match patterns. To + * alleviate this, the classifier supports a "deferred mode", in which changes + * in internal data structures needed for future version lookups may not be + * fully computed yet. The computation is finalized when the deferred mode is + * turned off. + * + * This feature can be used with versioning such that all changes to future + * versions are made in the deferred mode. Then, right before making the new + * version visible to lookups, the deferred mode is turned off so that all the + * data structures are ready for lookups with the new version number. * * To use deferred publication, first call classifier_defer(). Then, modify - * the classifier via additions and deletions. Call cls_rule_make_visible() on - * each new rule at an appropriate time. Finally, call classifier_publish(). + * the classifier via additions (classifier_insert() with a specific, future + * version number) and deletions (use cls_rule_make_removable_after_version()). + * Then call classifier_publish(), and after that, announce the new version + * number to be used in lookups. * * * Thread-safety * ============= * - * The classifier may safely be accessed by many reader threads concurrently or - * by a single writer. */ + * The classifier may safely be accessed by many reader threads concurrently + * and by a single writer, or by multiple writers when they guarantee mutually + * exlucive access to classifier modifications. + * + * Since the classifier rules are RCU protected, the rule destruction after + * removal from the classifier must be RCU postponed. Also, when versioning is + * used, the rule removal itself needs to be typically RCU postponed. In this + * case the rule destruction is doubly RCU postponed, i.e., the second + * ovsrcu_postpone() call to destruct the rule is called from the first RCU + * callback that removes the rule. + * + * Rules that have never been visible to lookups are an exeption to the above + * rule. Such rules can be removed immediately, but their destruction must + * still be RCU postponed, as the rule's visibility attribute may be examined + * parallel to the rule's removal. */ #include "cmap.h" #include "match.h" @@ -275,6 +327,8 @@ struct cls_trie { }; enum { + CLS_MIN_VERSION = 1, /* Default version number to use. */ + CLS_MAX_VERSION = LLONG_MAX, /* Last possible version number. */ CLS_MAX_INDICES = 3, /* Maximum number of lookup indices per subtable. */ CLS_MAX_TRIES = 3 /* Maximum number of prefix trees per classifier. */ }; @@ -301,22 +355,17 @@ struct cls_conjunction { /* A rule to be inserted to the classifier. */ struct cls_rule { - struct rculist node; /* In struct cls_subtable 'rules_list'. */ - int priority; /* Larger numbers are higher priorities. */ - bool to_be_removed; /* Rule will be deleted. - * This is the only field that may be - * modified after the rule has been added to - * a classifier. Modifications are to be - * done only under same locking as all other - * classifier modifications. This field may - * not be examined by lookups. */ - struct cls_match *cls_match; /* NULL if not in a classifier. */ - struct minimatch match; /* Matching rule. */ + struct rculist node; /* In struct cls_subtable 'rules_list'. */ + const int priority; /* Larger numbers are higher priorities. */ + const long long version; /* Version in which the rule was added. */ + struct cls_match *cls_match; /* NULL if not in a classifier. */ + const struct minimatch match; /* Matching rule. */ }; -void cls_rule_init(struct cls_rule *, const struct match *, int priority); +void cls_rule_init(struct cls_rule *, const struct match *, int priority, + long long version); void cls_rule_init_from_minimatch(struct cls_rule *, const struct minimatch *, - int priority); + int priority, long long version); void cls_rule_clone(struct cls_rule *, const struct cls_rule *); void cls_rule_move(struct cls_rule *dst, struct cls_rule *src); void cls_rule_destroy(struct cls_rule *); @@ -330,7 +379,11 @@ void cls_rule_format(const struct cls_rule *, struct ds *); bool cls_rule_is_catchall(const struct cls_rule *); bool cls_rule_is_loose_match(const struct cls_rule *rule, const struct minimatch *criteria); -void cls_rule_make_visible(const struct cls_rule *rule); +bool cls_rule_visible_in_version(const struct cls_rule *, long long version); +void cls_rule_make_invisible_in_version(const struct cls_rule *, + long long version, + long long lookup_version); +void cls_rule_restore_visibility(const struct cls_rule *); /* Constructor/destructor. Must run single-threaded. */ void classifier_init(struct classifier *, const uint8_t *flow_segments); @@ -354,7 +407,7 @@ static inline void classifier_publish(struct classifier *); /* Lookups. These are RCU protected and may run concurrently with modifiers * and each other. */ const struct cls_rule *classifier_lookup(const struct classifier *, - struct flow *, + long long version, struct flow *, struct flow_wildcards *); bool classifier_rule_overlaps(const struct classifier *, const struct cls_rule *); @@ -362,7 +415,8 @@ const struct cls_rule *classifier_find_rule_exactly(const struct classifier *, const struct cls_rule *); const struct cls_rule *classifier_find_match_exactly(const struct classifier *, const struct match *, - int priority); + int priority, + long long version); bool classifier_is_empty(const struct classifier *); int classifier_count(const struct classifier *); diff --git a/lib/ovs-router.c b/lib/ovs-router.c index bf205d6b4..532487e8f 100644 --- a/lib/ovs-router.c +++ b/lib/ovs-router.c @@ -68,7 +68,7 @@ ovs_router_lookup(ovs_be32 ip_dst, char output_bridge[], ovs_be32 *gw) const struct cls_rule *cr; struct flow flow = {.nw_dst = ip_dst}; - cr = classifier_lookup(&cls, &flow, NULL); + cr = classifier_lookup(&cls, CLS_MAX_VERSION, &flow, NULL); if (cr) { struct ovs_router_entry *p = ovs_router_entry_cast(cr); @@ -115,7 +115,8 @@ ovs_router_insert__(uint8_t priority, ovs_be32 ip_dst, uint8_t plen, p->nw_addr = match.flow.nw_dst; p->plen = plen; p->priority = priority; - cls_rule_init(&p->cr, &match, priority); /* Longest prefix matches first. */ + /* Longest prefix matches first. */ + cls_rule_init(&p->cr, &match, priority, CLS_MIN_VERSION); ovs_mutex_lock(&mutex); cr = classifier_replace(&cls, &p->cr, NULL, 0); @@ -144,7 +145,7 @@ rt_entry_delete(uint8_t priority, ovs_be32 ip_dst, uint8_t plen) rt_init_match(&match, ip_dst, plen); - cls_rule_init(&rule, &match, priority); + cls_rule_init(&rule, &match, priority, CLS_MIN_VERSION); /* Find the exact rule. */ cr = classifier_find_rule_exactly(&cls, &rule); diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c index 759d6ba33..2602db543 100644 --- a/lib/tnl-ports.c +++ b/lib/tnl-ports.c @@ -84,7 +84,7 @@ tnl_port_map_insert(odp_port_t port, ovs_be32 ip_dst, ovs_be16 udp_port, ovs_mutex_lock(&mutex); do { - cr = classifier_lookup(&cls, &match.flow, NULL); + cr = classifier_lookup(&cls, CLS_MAX_VERSION, &match.flow, NULL); p = tnl_port_cast(cr); /* Try again if the rule was released before we get the reference. */ } while (p && !ovs_refcount_try_ref_rcu(&p->ref_cnt)); @@ -99,7 +99,7 @@ tnl_port_map_insert(odp_port_t port, ovs_be32 ip_dst, ovs_be16 udp_port, match.wc.masks.tp_dst = OVS_BE16_MAX; match.wc.masks.nw_src = OVS_BE32_MAX; - cls_rule_init(&p->cr, &match, 0); /* Priority == 0. */ + cls_rule_init(&p->cr, &match, 0, CLS_MIN_VERSION); /* Priority == 0. */ ovs_refcount_init(&p->ref_cnt); ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name); @@ -130,7 +130,7 @@ tnl_port_map_delete(ovs_be32 ip_dst, ovs_be16 udp_port) tnl_port_init_flow(&flow, ip_dst, udp_port); - cr = classifier_lookup(&cls, &flow, NULL); + cr = classifier_lookup(&cls, CLS_MAX_VERSION, &flow, NULL); tnl_port_unref(cr); } @@ -139,7 +139,8 @@ tnl_port_map_delete(ovs_be32 ip_dst, ovs_be16 udp_port) odp_port_t tnl_port_map_lookup(struct flow *flow, struct flow_wildcards *wc) { - const struct cls_rule *cr = classifier_lookup(&cls, flow, wc); + const struct cls_rule *cr = classifier_lookup(&cls, CLS_MAX_VERSION, flow, + wc); return (cr) ? tnl_port_cast(cr)->portno : ODPP_NONE; } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index c4cafe00b..81beca049 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3725,7 +3725,7 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, struct rule_dpif *rule; do { - cls_rule = classifier_lookup(cls, flow, wc); + cls_rule = classifier_lookup(cls, CLS_MAX_VERSION, flow, wc); rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index f2e9557c6..b5424b9f4 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -153,6 +153,7 @@ struct rule_criteria { static void rule_criteria_init(struct rule_criteria *, uint8_t table_id, const struct match *match, int priority, + long long version, ovs_be64 cookie, ovs_be64 cookie_mask, ofp_port_t out_port, uint32_t out_group); static void rule_criteria_require_rw(struct rule_criteria *, @@ -2043,7 +2044,8 @@ ofproto_add_flow(struct ofproto *ofproto, const struct match *match, /* First do a cheap check whether the rule we're looking for already exists * with the actions that we want. If it does, then we're done. */ rule = rule_from_cls_rule(classifier_find_match_exactly( - &ofproto->tables[0].cls, match, priority)); + &ofproto->tables[0].cls, match, priority, + CLS_MAX_VERSION)); if (rule) { const struct rule_actions *actions = rule_get_actions(rule); must_add = !ofpacts_equal(actions->ofpacts, actions->ofpacts_len, @@ -2080,9 +2082,9 @@ ofproto_flow_mod(struct ofproto *ofproto, struct ofputil_flow_mod *fm) struct rule *rule; bool done = false; - rule = rule_from_cls_rule(classifier_find_match_exactly(&table->cls, - &fm->match, - fm->priority)); + rule = rule_from_cls_rule(classifier_find_match_exactly( + &table->cls, &fm->match, + fm->priority, CLS_MAX_VERSION)); if (rule) { /* Reading many of the rule fields and writing on 'modified' * requires the rule->mutex. Also, rule->actions may change @@ -2129,7 +2131,8 @@ ofproto_delete_flow(struct ofproto *ofproto, /* First do a cheap check whether the rule we're looking for has already * been deleted. If so, then we're done. */ rule = rule_from_cls_rule(classifier_find_match_exactly(cls, target, - priority)); + priority, + CLS_MAX_VERSION)); if (!rule) { return; } @@ -3038,7 +3041,7 @@ learned_cookies_flush(struct ofproto *ofproto, struct ovs_list *dead_cookies) struct match match; match_init_catchall(&match); - rule_criteria_init(&criteria, c->table_id, &match, 0, + rule_criteria_init(&criteria, c->table_id, &match, 0, CLS_MAX_VERSION, c->cookie, OVS_BE64_MAX, OFPP_ANY, OFPG_ANY); rule_criteria_require_rw(&criteria, false); collect_rules_loose(ofproto, &criteria, &rules); @@ -3676,12 +3679,12 @@ next_matching_table(const struct ofproto *ofproto, * supplied as 0. */ static void rule_criteria_init(struct rule_criteria *criteria, uint8_t table_id, - const struct match *match, int priority, + const struct match *match, int priority, long long version, ovs_be64 cookie, ovs_be64 cookie_mask, ofp_port_t out_port, uint32_t out_group) { criteria->table_id = table_id; - cls_rule_init(&criteria->cr, match, priority); + cls_rule_init(&criteria->cr, match, priority, version); criteria->cookie = cookie; criteria->cookie_mask = cookie_mask; criteria->out_port = out_port; @@ -3785,7 +3788,7 @@ rule_collection_destroy(struct rule_collection *rules) * function verifies most of the criteria in 'c' itself, but the caller must * check 'c->cr' itself. * - * Rules that have already been marked as 'to_be_removed' are not collected. + * Rules that have already been marked for removal are not collected. * * Increments '*n_readonly' if 'rule' wasn't added because it's read-only (and * 'c' only includes modifiable rules). */ @@ -3799,7 +3802,7 @@ collect_rule(struct rule *rule, const struct rule_criteria *c, && ofproto_rule_has_out_group(rule, c->out_group) && !((rule->flow_cookie ^ c->cookie) & c->cookie_mask) && (!rule_is_hidden(rule) || c->include_hidden) - && !rule->cr.to_be_removed) { + && cls_rule_visible_in_version(&rule->cr, c->cr.version)) { /* Rule matches all the criteria... */ if (!rule_is_readonly(rule) || c->include_readonly) { /* ...add it. */ @@ -3951,8 +3954,9 @@ handle_flow_stats_request(struct ofconn *ofconn, return error; } - rule_criteria_init(&criteria, fsr.table_id, &fsr.match, 0, fsr.cookie, - fsr.cookie_mask, fsr.out_port, fsr.out_group); + rule_criteria_init(&criteria, fsr.table_id, &fsr.match, 0, CLS_MAX_VERSION, + fsr.cookie, fsr.cookie_mask, fsr.out_port, + fsr.out_group); ovs_mutex_lock(&ofproto_mutex); error = collect_rules_loose(ofproto, &criteria, &rules); @@ -4115,7 +4119,7 @@ handle_aggregate_stats_request(struct ofconn *ofconn, } rule_criteria_init(&criteria, request.table_id, &request.match, 0, - request.cookie, request.cookie_mask, + CLS_MAX_VERSION, request.cookie, request.cookie_mask, request.out_port, request.out_group); ovs_mutex_lock(&ofproto_mutex); @@ -4404,10 +4408,10 @@ add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return OFPERR_OFPBRC_EPERM; } - cls_rule_init(&cr, &fm->match, fm->priority); + cls_rule_init(&cr, &fm->match, fm->priority, CLS_MIN_VERSION); /* Check for the existence of an identical rule. - * This will not return rules earlier marked as 'to_be_removed'. */ + * This will not return rules earlier marked for removal. */ rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls, &cr)); if (rule) { /* Transform "add" into "modify" of an existing identical flow. */ @@ -4506,7 +4510,6 @@ add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } else { struct oftable *table = &ofproto->tables[rule->table_id]; - cls_rule_make_visible(&rule->cr); classifier_publish(&table->cls); learned_cookies_inc(ofproto, rule_get_actions(rule)); @@ -4573,7 +4576,7 @@ modify_flows_check__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return 0; } -/* Modifies the 'rule', changing them to match 'fm'. */ +/* Modifies the 'rule', changing it to match 'fm'. */ static void modify_flow__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, struct rule *rule, @@ -4742,7 +4745,7 @@ modify_flows_start_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, struct rule_criteria criteria; enum ofperr error; - rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, + rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, CLS_MAX_VERSION, fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); @@ -4796,7 +4799,8 @@ modify_flow_start_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, - fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); + CLS_MAX_VERSION, fm->cookie, fm->cookie_mask, OFPP_ANY, + OFPG11_ANY); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); error = collect_rules_strict(ofproto, &criteria, rules); @@ -4826,11 +4830,12 @@ delete_flows__(const struct rule_collection *rules, struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); struct ofproto *ofproto = rules->rules[0]->ofproto; struct rule *rule, *next; + uint8_t prev_table = UINT8_MAX; size_t i; for (i = 0, next = rules->rules[0]; rule = next, next = (++i < rules->n) ? rules->rules[i] : NULL, - rule; ) { + rule; prev_table = rule->table_id) { struct classifier *cls = &ofproto->tables[rule->table_id].cls; uint8_t next_table = next ? next->table_id : UINT8_MAX; @@ -4840,7 +4845,8 @@ delete_flows__(const struct rule_collection *rules, req ? req->ofconn : NULL, req ? req->request->xid : 0, NULL); - if (next_table == rule->table_id) { + /* Defer once for each new table. */ + if (rule->table_id != prev_table) { classifier_defer(cls); } if (!classifier_remove(cls, &rule->cr)) { @@ -4873,7 +4879,7 @@ delete_flows_start_loose(struct ofproto *ofproto, struct rule_criteria criteria; enum ofperr error; - rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, + rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, CLS_MAX_VERSION, fm->cookie, fm->cookie_mask, fm->out_port, fm->out_group); rule_criteria_require_rw(&criteria, @@ -4885,7 +4891,10 @@ delete_flows_start_loose(struct ofproto *ofproto, for (size_t i = 0; i < rules->n; i++) { struct rule *rule = rules->rules[i]; - CONST_CAST(struct cls_rule *, &rule->cr)->to_be_removed = true; + cls_rule_make_invisible_in_version(CONST_CAST(struct cls_rule *, + &rule->cr), + CLS_MIN_VERSION, + CLS_MIN_VERSION); } } @@ -4897,9 +4906,7 @@ delete_flows_revert(struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { for (size_t i = 0; i < rules->n; i++) { - struct rule *rule = rules->rules[i]; - - CONST_CAST(struct cls_rule *, &rule->cr)->to_be_removed = false; + cls_rule_restore_visibility(&rules->rules[i]->cr); } rule_collection_destroy(rules); } @@ -4925,7 +4932,7 @@ delete_flow_start_strict(struct ofproto *ofproto, enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, - fm->cookie, fm->cookie_mask, + CLS_MAX_VERSION, fm->cookie, fm->cookie_mask, fm->out_port, fm->out_group); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); @@ -4936,7 +4943,10 @@ delete_flow_start_strict(struct ofproto *ofproto, for (size_t i = 0; i < rules->n; i++) { struct rule *rule = rules->rules[i]; - CONST_CAST(struct cls_rule *, &rule->cr)->to_be_removed = true; + cls_rule_make_invisible_in_version(CONST_CAST(struct cls_rule *, + &rule->cr), + CLS_MIN_VERSION, + CLS_MIN_VERSION); } } @@ -5340,7 +5350,7 @@ ofproto_collect_ofmonitor_refresh_rules(const struct ofmonitor *m, const struct oftable *table; struct cls_rule target; - cls_rule_init_from_minimatch(&target, &m->match, 0); + cls_rule_init_from_minimatch(&target, &m->match, 0, CLS_MAX_VERSION); FOR_EACH_MATCHING_TABLE (table, m->table_id, ofproto) { struct rule *rule; @@ -5877,8 +5887,8 @@ group_get_ref_count(struct ofgroup *group) uint32_t count; match_init_catchall(&match); - rule_criteria_init(&criteria, 0xff, &match, 0, htonll(0), htonll(0), - OFPP_ANY, group->group_id); + rule_criteria_init(&criteria, 0xff, &match, 0, CLS_MAX_VERSION, htonll(0), + htonll(0), OFPP_ANY, group->group_id); ovs_mutex_lock(&ofproto_mutex); error = collect_rules_loose(ofproto, &criteria, &rules); ovs_mutex_unlock(&ofproto_mutex); @@ -6518,26 +6528,6 @@ do_bundle_flow_mod_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } } -/* Commit phases (all while locking ofproto_mutex): - * - * 1. Gather resources - do not send any events or notifications. - * - * add: Check conflicts, check for a displaced flow. If no displaced flow - * exists, add the new flow, but mark it as "invisible". - * mod: Collect affected flows, Do not modify yet. - * del: Collect affected flows, Do not delete yet. - * - * 2a. Fail if any errors are found. After this point no errors are possible. - * No visible changes were made, so rollback is minimal (remove added invisible - * flows, revert 'to_be_removed' status of flows). - * - * 2b. Commit the changes - * - * add: if have displaced flow, modify it, otherwise mark the new flow as - * "visible". - * mod: Modify the collected flows. - * del: Delete the collected flows. - */ static enum ofperr do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) { @@ -7434,7 +7424,7 @@ ofproto_get_vlan_usage(struct ofproto *ofproto, unsigned long int *vlan_bitmap) match_init_catchall(&match); match_set_vlan_vid_masked(&match, htons(VLAN_CFI), htons(VLAN_CFI)); - cls_rule_init(&target, &match, 0); + cls_rule_init(&target, &match, 0, CLS_MAX_VERSION); free(ofproto->vlan_bitmap); ofproto->vlan_bitmap = bitmap_allocate(4096); diff --git a/tests/test-classifier.c b/tests/test-classifier.c index a615438c0..24fc5eb11 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -433,7 +433,7 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls) /* This assertion is here to suppress a GCC 4.9 array-bounds warning */ ovs_assert(cls->n_tries <= CLS_MAX_TRIES); - cr0 = classifier_lookup(cls, &flow, &wc); + cr0 = classifier_lookup(cls, CLS_MAX_VERSION, &flow, &wc); cr1 = tcls_lookup(tcls, &flow); assert((cr0 == NULL) == (cr1 == NULL)); if (cr0 != NULL) { @@ -443,7 +443,7 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls) assert(cls_rule_equal(cr0, cr1)); assert(tr0->aux == tr1->aux); } - cr2 = classifier_lookup(cls, &flow, NULL); + cr2 = classifier_lookup(cls, CLS_MAX_VERSION, &flow, NULL); assert(cr2 == cr0); } } @@ -635,7 +635,7 @@ make_rule(int wc_fields, int priority, int value_pat) rule = xzalloc(sizeof *rule); cls_rule_init(&rule->cls_rule, &match, wc_fields ? (priority == INT_MIN ? priority + 1 : priority) - : INT_MAX); + : INT_MAX, CLS_MIN_VERSION); return rule; } diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 10d44e0ef..812ce7fff 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -2457,7 +2457,7 @@ fte_insert(struct classifier *cls, const struct match *match, struct fte *old, *fte; fte = xzalloc(sizeof *fte); - cls_rule_init(&fte->rule, match, priority); + cls_rule_init(&fte->rule, match, priority, CLS_MIN_VERSION); fte->versions[index] = version; old = fte_from_cls_rule(classifier_replace(cls, &fte->rule, NULL, 0)); @@ -2467,7 +2467,6 @@ fte_insert(struct classifier *cls, const struct match *match, ovsrcu_postpone(fte_free, old); } - cls_rule_make_visible(&fte->rule); } /* Reads the flows in 'filename' as flow table entries in 'cls' for the version From cbc083e330186a23c17a1bea284f39dfe9ffb2ad Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 5 Jun 2015 22:09:50 -0700 Subject: [PATCH 133/146] netdev-vport: Mark netdev_vport_get_dpif_port() as OVS_WARN_UNUSED_RESULT. Ignoring the result of this function means that the caller is quite likely blindly using the character array passed in, instead of the return value, which leads to latent bugs. This would have prevented one of the bugs fixed by commit "tunneling: Fix a tunnel name display bug". CC: Andy Zhou Signed-off-by: Ben Pfaff Acked-by: Andy Zhou --- lib/netdev-vport.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/netdev-vport.h b/lib/netdev-vport.h index b20c40716..be02cb569 100644 --- a/lib/netdev-vport.h +++ b/lib/netdev-vport.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2011, 2013 Nicira, Inc. + * Copyright (c) 2010, 2011, 2013, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include "compiler.h" struct dpif_netlink_vport; struct dpif_flow_stats; @@ -48,7 +49,8 @@ enum { NETDEV_VPORT_NAME_BUFSIZE = 16 }; enum { NETDEV_VPORT_NAME_BUFSIZE = 256 }; #endif const char *netdev_vport_get_dpif_port(const struct netdev *, - char namebuf[], size_t bufsize); + char namebuf[], size_t bufsize) + OVS_WARN_UNUSED_RESULT; char *netdev_vport_get_dpif_port_strdup(const struct netdev *); #endif /* netdev-vport.h */ From 21f217884710019b337c35ec434ae75689044340 Mon Sep 17 00:00:00 2001 From: Sorin Vinturis Date: Thu, 11 Jun 2015 12:52:49 +0000 Subject: [PATCH 134/146] datapath-windows: Correctly complete the original NBL with multiple NBs OvsCreateNewNBLsFromMultipleNBs function failed to correctly complete the original NBL with multiple NBs after creating multiple NBLs with single NB. Signed-off-by: Sorin Vinturis Acked-by: Eitan Eliahu Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- datapath-windows/ovsext/PacketIO.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index 7b2c0c81c..d41335b91 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -524,11 +524,12 @@ OvsCreateNewNBLsFromMultipleNBs(POVS_SWITCH_CONTEXT switchContext, } lastNbl->Next = *nextNbl; *nextNbl = newNbls->Next; - *curNbl = newNbls; - (*curNbl)->Next = NULL; OvsCompleteNBL(switchContext, *curNbl, TRUE); + *curNbl = newNbls; + (*curNbl)->Next = NULL; + error = FALSE; } while (error); From 022c20408192a6c35f8f629411b07c13250e9682 Mon Sep 17 00:00:00 2001 From: Eitan Eliahu Date: Thu, 11 Jun 2015 06:35:54 -0700 Subject: [PATCH 135/146] datapath-windows: Stateless TCP Tunnelling protocol - Initial implementation This change include an initial implementable of STT. The following should be added: [1] Checksum offload (SW and HW) [2] LSO (SW and HW) [3] IP layer WFP callout for IP segments Added support for multiple (per TCP port) STT ports Testing: link layer connection through ping works. File transfer. Signed-off-by: Eitan Eliahu Co-authored-by: Saurabh Shah Signed-off-by: Saurabh Shah Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- INSTALL.Windows.md | 36 +-- datapath-windows/automake.mk | 2 + datapath-windows/ovsext/Actions.c | 64 +++-- datapath-windows/ovsext/Debug.h | 1 + datapath-windows/ovsext/Stt.c | 369 +++++++++++++++++++++++++ datapath-windows/ovsext/Stt.h | 89 ++++++ datapath-windows/ovsext/Switch.h | 2 - datapath-windows/ovsext/Tunnel.c | 3 +- datapath-windows/ovsext/Util.h | 1 + datapath-windows/ovsext/Vport.c | 52 +++- datapath-windows/ovsext/Vport.h | 47 +++- datapath-windows/ovsext/Vxlan.c | 16 +- datapath-windows/ovsext/Vxlan.h | 9 +- datapath-windows/ovsext/ovsext.vcxproj | 4 +- 14 files changed, 632 insertions(+), 63 deletions(-) create mode 100644 datapath-windows/ovsext/Stt.c create mode 100644 datapath-windows/ovsext/Stt.h diff --git a/INSTALL.Windows.md b/INSTALL.Windows.md index 3171e47d7..6d870edd6 100644 --- a/INSTALL.Windows.md +++ b/INSTALL.Windows.md @@ -386,29 +386,31 @@ Hyper-Vs. The following examples demonstrate how it can be done: % ovs-vsctl add-port br-int ovs-port-a tag=900 % ovs-vsctl add-port br-int ovs-port-b tag=900 -Steps to add VXLAN tunnels +Steps to add tunnels -------------------------- -The Windows Open vSwitch implementation support VXLAN tunnels. To add VXLAN +The Windows Open vSwitch implementation support VXLAN and STT tunnels. To add tunnels, the following steps serve as examples. Note that, any patch ports created between br-int and br-pif MUST be beleted -prior to adding VXLAN tunnels. +prior to adding tunnels. -01> Add the vxlan port between 172.168.201.101 <-> 172.168.201.102 - % ovs-vsctl add-port br-int vxlan-1 - % ovs-vsctl set Interface vxlan-1 type=vxlan - % ovs-vsctl set Interface vxlan-1 options:local_ip=172.168.201.101 - % ovs-vsctl set Interface vxlan-1 options:remote_ip=172.168.201.102 - % ovs-vsctl set Interface vxlan-1 options:in_key=flow - % ovs-vsctl set Interface vxlan-1 options:out_key=flow +01> Add the tunnel port between 172.168.201.101 <-> 172.168.201.102 + % ovs-vsctl add-port br-int tun-1 + % ovs-vsctl set Interface tun-1 type=port-type + % ovs-vsctl set Interface tun-1 options:local_ip=172.168.201.101 + % ovs-vsctl set Interface tun-1 options:remote_ip=172.168.201.102 + % ovs-vsctl set Interface tun-1 options:in_key=flow + % ovs-vsctl set Interface tun-1 options:out_key=flow -02> Add the vxlan port between 172.168.201.101 <-> 172.168.201.105 - % ovs-vsctl add-port br-int vxlan-2 - % ovs-vsctl set Interface vxlan-2 type=vxlan - % ovs-vsctl set Interface vxlan-2 options:local_ip=172.168.201.102 - % ovs-vsctl set Interface vxlan-2 options:remote_ip=172.168.201.105 - % ovs-vsctl set Interface vxlan-2 options:in_key=flow - % ovs-vsctl set Interface vxlan-2 options:out_key=flow +02> Add the tunnel port between 172.168.201.101 <-> 172.168.201.105 + % ovs-vsctl add-port br-int tun-2 + % ovs-vsctl set Interface tun-2 type=port-type + % ovs-vsctl set Interface tun-2 options:local_ip=172.168.201.102 + % ovs-vsctl set Interface tun-2 options:remote_ip=172.168.201.105 + % ovs-vsctl set Interface tun-2 options:in_key=flow + % ovs-vsctl set Interface tun-2 options:out_key=flow + + Where port-type is the string stt or vxlan Requirements diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 9324b3c27..a4f5a576d 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -56,6 +56,8 @@ EXTRA_DIST += \ datapath-windows/ovsext/Vport.c \ datapath-windows/ovsext/Vport.h \ datapath-windows/ovsext/Vxlan.c \ + datapath-windows/ovsext/Stt.h \ + datapath-windows/ovsext/Stt.c \ datapath-windows/ovsext/Vxlan.h \ datapath-windows/ovsext/ovsext.inf \ datapath-windows/ovsext/ovsext.rc \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 79e464c80..d75949ca4 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -23,6 +23,7 @@ #include "NetProto.h" #include "Flow.h" #include "Vxlan.h" +#include "Stt.h" #include "Checksum.h" #include "PacketIO.h" @@ -35,6 +36,8 @@ typedef struct _OVS_ACTION_STATS { UINT64 rxVxlan; UINT64 txVxlan; + UINT64 rxStt; + UINT64 txStt; UINT64 flowMiss; UINT64 flowUserspace; UINT64 txTcp; @@ -184,9 +187,6 @@ OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx, } /* - * XXX: When we search for the tunnelVport we also need to specify the - * tunnelling protocol or the L4 protocol as key as well, because there are - * different protocols that can use the same destination port. * -------------------------------------------------------------------------- * OvsDetectTunnelRxPkt -- * Utility function for an RX packet to detect its tunnel type. @@ -209,14 +209,27 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, flowKey->ipKey.nwProto == IPPROTO_UDP) { UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, - dstPort); + dstPort, + OVS_VPORT_TYPE_VXLAN); + if (tunnelVport) { + ovsActionStats.rxVxlan++; + } + } else if (!flowKey->ipKey.nwFrag && + flowKey->ipKey.nwProto == IPPROTO_TCP) { + UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort, + OVS_VPORT_TYPE_STT); + if (tunnelVport) { + ovsActionStats.rxStt++; + } } + // We might get tunnel packets even before the tunnel gets initialized. if (tunnelVport) { ASSERT(ovsFwdCtx->tunnelRxNic == NULL); ovsFwdCtx->tunnelRxNic = tunnelVport; - ovsActionStats.rxVxlan++; return TRUE; } @@ -292,6 +305,14 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, /* Tunnel the packet only if tunnel context is set. */ if (ovsFwdCtx->tunKey.dst != 0) { + switch(dstVport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + ovsActionStats.txVxlan++; + break; + case OVS_VPORT_TYPE_STT: + ovsActionStats.txStt++; + break; + } ovsActionStats.txVxlan++; ovsFwdCtx->tunnelTxNic = dstVport; } @@ -616,10 +637,11 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) * Setup the source port to be the internal port to as to facilitate the * second OvsLookupFlow. */ - if (ovsFwdCtx->switchContext->internalVport == NULL) { + if (ovsFwdCtx->switchContext->internalVport == NULL || + ovsFwdCtx->switchContext->virtualExternalVport == NULL) { OvsClearTunTxCtx(ovsFwdCtx); OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped since internal port is absent"); + L"OVS-Dropped since either internal or external port is absent"); return NDIS_STATUS_FAILURE; } ovsFwdCtx->srcVportNo = @@ -634,9 +656,14 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) case OVS_VPORT_TYPE_VXLAN: status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, - (VOID *)ovsFwdCtx->completionList, &ovsFwdCtx->layers, &newNbl); break; + case OVS_VPORT_TYPE_STT: + status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, + ovsFwdCtx->switchContext, + &ovsFwdCtx->layers, &newNbl); + break; default: ASSERT(! "Tx: Unhandled tunnel type"); } @@ -692,14 +719,19 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) goto dropNbl; } + /* + * Decap port functions should return a new NBL if it was copied, and + * this new NBL should be setup as the ovsFwdCtx->curNbl. + */ + switch(tunnelRxVport->ovsType) { case OVS_VPORT_TYPE_VXLAN: - /* - * OvsDoDecapVxlan should return a new NBL if it was copied, and - * this new NBL should be setup as the ovsFwdCtx->curNbl. - */ - status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - &ovsFwdCtx->tunKey, &newNbl); + status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; + case OVS_VPORT_TYPE_STT: + status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); break; default: OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", @@ -1318,13 +1350,11 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, case OVS_KEY_ATTR_TUNNEL: { OvsIPv4TunnelKey tunKey; - - status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); + status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); ASSERT(status == NDIS_STATUS_SUCCESS); tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); tunKey.dst_port = key->ipKey.l4.tpDst; RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); - break; } case OVS_KEY_ATTR_SKB_MARK: diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h index a0da5eba3..4b7b52651 100644 --- a/datapath-windows/ovsext/Debug.h +++ b/datapath-windows/ovsext/Debug.h @@ -40,6 +40,7 @@ #define OVS_DBG_OTHERS BIT32(21) #define OVS_DBG_NETLINK BIT32(22) #define OVS_DBG_TUNFLT BIT32(23) +#define OVS_DBG_STT BIT32(24) #define OVS_DBG_RESERVED BIT32(31) //Please add above OVS_DBG_RESERVED. diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c new file mode 100644 index 000000000..1f708c843 --- /dev/null +++ b/datapath-windows/ovsext/Stt.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2015 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "NetProto.h" +#include "Switch.h" +#include "Vport.h" +#include "Flow.h" +#include "Stt.h" +#include "IpHelper.h" +#include "Checksum.h" +#include "User.h" +#include "PacketIO.h" +#include "Flow.h" +#include "PacketParser.h" +#include "Atomic.h" +#include "Util.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_STT +#include "Debug.h" + +static NDIS_STATUS +OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl); + +/* + * -------------------------------------------------------------------------- + * OvsInitSttTunnel -- + * Initialize STT tunnel module. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsInitSttTunnel(POVS_VPORT_ENTRY vport, + UINT16 tcpDestPort) +{ + POVS_STT_VPORT sttPort; + + sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort), + OVS_STT_POOL_TAG); + if (!sttPort) { + OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(sttPort, sizeof(*sttPort)); + sttPort->dstPort = tcpDestPort; + vport->priv = (PVOID) sttPort; + return STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsCleanupSttTunnel -- + * Cleanup STT Tunnel module. + * -------------------------------------------------------------------------- + */ +void +OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVS_VPORT_TYPE_STT || + vport->priv == NULL) { + return; + } + + OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG); + vport->priv = NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsEncapStt -- + * Encapsulates a packet with an STT header. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsEncapStt(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + OVS_FWD_INFO fwdInfo; + NDIS_STATUS status; + + UNREFERENCED_PARAMETER(switchContext); + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + /* + * XXX This case where the ARP table is not populated is + * currently not handled + */ + return NDIS_STATUS_FAILURE; + } + + status = OvsDoEncapStt(vport, curNbl, tunKey, &fwdInfo, layers, switchContext, + newNbl); + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDoEncapStt -- + * Internal utility function which actually does the STT encap. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDoEncapStt(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PMDL curMdl = NULL; + PNET_BUFFER curNb; + PUINT8 buf = NULL; + EthHdr *outerEthHdr; + IPHdr *outerIpHdr; + TCPHdr *outerTcpHdr; + SttHdr *sttHdr; + UINT32 innerFrameLen, ipTotalLen; + POVS_STT_VPORT vportStt; + UINT32 headRoom = OvsGetSttTunHdrSize(); + UINT32 tcpChksumLen; + + UNREFERENCED_PARAMETER(layers); + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; + + lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + if (lsoInfo.LsoV1Transmit.MSS) { + /* XXX We don't handle LSO yet */ + OVS_LOG_ERROR("LSO on STT is not supported"); + return NDIS_STATUS_FAILURE; + } + } + + vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport); + ASSERT(vportStt); + + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*copy NblInfo*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + /* NB Chain should be split before */ + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb); + /* + * External port can't be removed as we hold the dispatch lock + * We also check if the external port was removed beforecalling + * port encapsulation functions + */ + if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) { + OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate", + innerFrameLen, OvsGetExternalMtu(switchContext)); + status = NDIS_STATUS_FAILURE; + goto ret_error; + } + + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)"); + OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)"); + goto ret_error; + } + + /* + * Make sure that the headroom for the tunnel header is continguous in + * memory. + */ + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) + >= (int) headRoom); + + buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!buf) { + ASSERT(!"MmGetSystemAddressForMdlSafe failed"); + OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed"); + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + outerEthHdr = (EthHdr *)buf; + outerIpHdr = (IPHdr *) (outerEthHdr + 1); + outerTcpHdr = (TCPHdr *) (outerIpHdr + 1); + sttHdr = (SttHdr *) (outerTcpHdr + 1); + + /* L2 header */ + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr, + sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source); + outerEthHdr->Type = htons(ETH_TYPE_IPV4); + + /* L3 header */ + outerIpHdr->ihl = sizeof(IPHdr) >> 2; + outerIpHdr->version = IPPROTO_IPV4; + outerIpHdr->tos = tunKey->tos; + + ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; + outerIpHdr->tot_len = htons(ipTotalLen); + ASSERT(ipTotalLen < 65536); + + outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen); + outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64; + outerIpHdr->protocol = IPPROTO_TCP; + outerIpHdr->check = 0; + outerIpHdr->saddr = fwdInfo->srcIpAddr; + outerIpHdr->daddr = tunKey->dst; + outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0); + + /* L4 header */ + RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr); + outerTcpHdr->source = htons(tunKey->flow_hash | 32768); + outerTcpHdr->dest = htons(vportStt->dstPort); + outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) << + STT_SEQ_LEN_SHIFT); + outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo)); + outerTcpHdr->doff = sizeof(TCPHdr) >> 2; + outerTcpHdr->psh = 1; + outerTcpHdr->ack = 1; + outerTcpHdr->window = (uint16) ~0; + + /* Calculate pseudo header chksum */ + tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; + ASSERT(tcpChksumLen < 65535); + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst, + IPPROTO_TCP, (uint16) tcpChksumLen); + sttHdr->version = 0; + + /* XXX need to peek into the inner packet, hard code for now */ + sttHdr->flags = STT_PROTO_IPV4; + sttHdr->l4Offset = 0; + + sttHdr->reserved = 0; + /* XXX Used for large TCP packets.Not sure how it is used, clarify */ + sttHdr->mss = 0; + sttHdr->vlanTCI = 0; + sttHdr->key = tunKey->tunnelId; + /* Zero out stt padding */ + *(uint16 *)(sttHdr + 1) = 0; + + /* Calculate software tcp checksum */ + outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen, + sizeof(EthHdr) + sizeof(IPHdr)); + if (outerTcpHdr->check == 0) { + status = NDIS_STATUS_FAILURE; + goto ret_error; + } + + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDecapStt -- + * Decapsulates an STT packet. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + PNET_BUFFER curNb; + IPHdr *ipHdr; + char *ipBuf[sizeof(IPHdr)]; + SttHdr *sttHdr; + char *sttBuf[STT_HDR_LEN]; + UINT32 advanceCnt, hdrLen; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) { + OVS_LOG_ERROR("Packet length received is less than the tunnel header:" + " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize()); + return NDIS_STATUS_INVALID_LENGTH; + } + + /* Skip Eth header */ + hdrLen = sizeof(EthHdr); + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt = hdrLen; + + ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf, + 1 /*no align*/, 0); + ASSERT(ipHdr); + + /* Skip IP & TCP headers */ + hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt += hdrLen; + + /* STT Header */ + sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf, + 1 /*no align*/, 0); + ASSERT(sttHdr); + + /* Initialize the tunnel key */ + tunKey->dst = ipHdr->daddr; + tunKey->src = ipHdr->saddr; + tunKey->tunnelId = sttHdr->key; + tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY); + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + + /* Skip stt header, DataOffset points to inner pkt now. */ + hdrLen = STT_HDR_LEN; + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt += hdrLen; + + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE, + 0, FALSE /*copy NBL info*/); + + ASSERT(advanceCnt == OvsGetSttTunHdrSize()); + status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL); + + if (*newNbl == NULL) { + OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL"); + status = NDIS_STATUS_RESOURCES; + } + + return status; +} diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h new file mode 100644 index 000000000..38d721c49 --- /dev/null +++ b/datapath-windows/ovsext/Stt.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_STT_H_ +#define __OVS_STT_H_ 1 + +#define STT_TCP_PORT 7471 +#define STT_TCP_PORT_NBO 0x2f1d + +#define MAX_IP_TOTAL_LEN 65535 + +// STT defines. +#define STT_SEQ_LEN_SHIFT 16 +#define STT_SEQ_OFFSET_MASK ((1 << STT_SEQ_LEN_SHIFT) - 1) +#define STT_FRAME_LEN(seq) ((seq) >> STT_SEQ_LEN_SHIFT) +#define STT_SEGMENT_OFF(seq) ((seq) & STT_SEQ_OFFSET_MASK) + +#define STT_CSUM_VERIFIED (1 << 0) +#define STT_CSUM_PARTIAL (1 << 1) +#define STT_PROTO_IPV4 (1 << 2) +#define STT_PROTO_TCP (1 << 3) +#define STT_PROTO_TYPES (STT_PROTO_IPV4 | STT_PROTO_TCP) + +#define STT_ETH_PAD 2 +typedef struct SttHdr { + UINT8 version; + UINT8 flags; + UINT8 l4Offset; + UINT8 reserved; + UINT16 mss; + UINT16 vlanTCI; + UINT64 key; +} SttHdr, *PSttHdr; + +#define STT_HDR_LEN (sizeof(SttHdr) + STT_ETH_PAD) + +typedef struct _OVS_STT_VPORT { + UINT16 dstPort; + UINT64 ackNo; + UINT64 ipId; + + UINT64 inPkts; + UINT64 outPkts; + UINT64 slowInPkts; + UINT64 slowOutPkts; +} OVS_STT_VPORT, *POVS_STT_VPORT; + +NTSTATUS OvsInitSttTunnel(POVS_VPORT_ENTRY vport, + UINT16 udpDestPort); + +VOID OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); + + +void OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsEncapStt(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + + +NDIS_STATUS OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT32 +OvsGetSttTunHdrSize(VOID) +{ + return sizeof (EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr) + + STT_HDR_LEN; +} + +#endif /*__OVS_STT_H_ */ diff --git a/datapath-windows/ovsext/Switch.h b/datapath-windows/ovsext/Switch.h index 8e1eb5f2c..3bc20eee4 100644 --- a/datapath-windows/ovsext/Switch.h +++ b/datapath-windows/ovsext/Switch.h @@ -218,6 +218,4 @@ OvsAcquireSwitchContext(VOID); VOID OvsReleaseSwitchContext(POVS_SWITCH_CONTEXT switchContext); -PVOID OvsGetExternalVport(); - #endif /* __SWITCH_H_ */ diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c index 002f18024..ffb9c011d 100644 --- a/datapath-windows/ovsext/Tunnel.c +++ b/datapath-windows/ovsext/Tunnel.c @@ -286,7 +286,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; vport = OvsFindTunnelVportByDstPort(gOvsSwitchContext, - htons(tunnelKey.dst_port)); + htons(tunnelKey.dst_port), + OVS_VPORT_TYPE_VXLAN); if (vport == NULL){ status = STATUS_UNSUCCESSFUL; diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h index 9a0124245..ee676fa71 100644 --- a/datapath-windows/ovsext/Util.h +++ b/datapath-windows/ovsext/Util.h @@ -33,6 +33,7 @@ #define OVS_SWITCH_POOL_TAG 'SSVO' #define OVS_USER_POOL_TAG 'USVO' #define OVS_VPORT_POOL_TAG 'PSVO' +#define OVS_STT_POOL_TAG 'TSVO' VOID *OvsAllocateMemory(size_t size); VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag); diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index 5a1b64f18..913954561 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -21,6 +21,7 @@ #include "Event.h" #include "User.h" #include "Vxlan.h" +#include "Stt.h" #include "IpHelper.h" #include "Oid.h" #include "Datapath.h" @@ -602,7 +603,8 @@ OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, - UINT16 dstPort) + UINT16 dstPort, + OVS_VPORT_TYPE ovsPortType) { POVS_VPORT_ENTRY vport; PLIST_ENTRY head, link; @@ -611,7 +613,8 @@ OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, head = &(switchContext->tunnelVportsArray[hash & OVS_VPORT_MASK]); LIST_FORALL(head, link) { vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, tunnelVportLink); - if (((POVS_VXLAN_VPORT)vport->priv)->dstPort == dstPort) { + if (GetPortFromPriv(vport) == dstPort && + vport->ovsType == ovsPortType) { return vport; } } @@ -934,6 +937,9 @@ OvsInitTunnelVport(PVOID userContext, (PVOID)tunnelContext); break; } + case OVS_VPORT_TYPE_STT: + status = OvsInitSttTunnel(vport, dstPort); + break; default: ASSERT(0); } @@ -1079,10 +1085,11 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, switch(vport->ovsType) { case OVS_VPORT_TYPE_VXLAN: + case OVS_VPORT_TYPE_STT: { - POVS_VXLAN_VPORT vxlanVport = (POVS_VXLAN_VPORT)vport->priv; - hash = OvsJhashBytes(&vxlanVport->dstPort, - sizeof(vxlanVport->dstPort), + UINT16 dstPort = GetPortFromPriv(vport); + hash = OvsJhashBytes(&dstPort, + sizeof(dstPort), OVS_HASH_BASIS); InsertHeadList( &gOvsSwitchContext->tunnelVportsArray[hash & OVS_VPORT_MASK], @@ -1158,7 +1165,8 @@ OvsCleanupVportCommon(POVS_SWITCH_CONTEXT switchContext, InitializeListHead(&vport->ovsNameLink); RemoveEntryList(&vport->portNoLink); InitializeListHead(&vport->portNoLink); - if (OVS_VPORT_TYPE_VXLAN == vport->ovsType) { + if (OVS_VPORT_TYPE_VXLAN == vport->ovsType || + OVS_VPORT_TYPE_STT == vport->ovsType) { RemoveEntryList(&vport->tunnelVportLink); InitializeListHead(&vport->tunnelVportLink); } @@ -1258,6 +1266,9 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, tunnelContext); break; } + case OVS_VPORT_TYPE_STT: + OvsCleanupSttTunnel(vport); + break; case OVS_VPORT_TYPE_GRE: case OVS_VPORT_TYPE_GRE64: break; @@ -2147,17 +2158,29 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, vportAllocated = TRUE; if (OvsIsTunnelVportType(portType)) { - UINT16 udpPortDest = VXLAN_UDP_PORT; + UINT16 transportPortDest = 0; + + switch (vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + transportPortDest = VXLAN_UDP_PORT; + break; + case OVS_VPORT_TYPE_STT: + transportPortDest = STT_TCP_PORT; + break; + default: + break; + } + PNL_ATTR attr = NlAttrFindNested(vportAttrs[OVS_VPORT_ATTR_OPTIONS], OVS_TUNNEL_ATTR_DST_PORT); if (attr) { - udpPortDest = NlAttrGetU16(attr); + transportPortDest = NlAttrGetU16(attr); } status = OvsInitTunnelVport(usrParamsCtx, vport, portType, - udpPortDest); + transportPortDest); nlError = NlMapStatusToNlErr(status); } else { @@ -2243,7 +2266,16 @@ Cleanup: if (vport && vportAllocated == TRUE) { if (vportInitialized == TRUE) { if (OvsIsTunnelVportType(portType)) { - OvsCleanupVxlanTunnel(NULL, vport, NULL, NULL); + switch (vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + OvsCleanupVxlanTunnel(NULL, vport, NULL, NULL); + break; + case OVS_VPORT_TYPE_STT: + OvsCleanupSttTunnel(vport);; + break; + default: + ASSERT(!"Invalid tunnel port type"); + } } } OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index 84ac3d3fa..3ea3d0303 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -18,6 +18,8 @@ #define __VPORT_H_ 1 #include "Switch.h" +#include "VxLan.h" +#include "Stt.h" #define OVS_MAX_DPPORTS MAXUINT16 #define OVS_DPPORT_NUMBER_INVALID OVS_MAX_DPPORTS @@ -147,7 +149,8 @@ POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchConte NDIS_SWITCH_PORT_ID portId, NDIS_SWITCH_NIC_INDEX index); POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, - UINT16 dstPort); + UINT16 dstPort, + OVS_VPORT_TYPE ovsVportType); NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); @@ -177,10 +180,18 @@ static __inline BOOLEAN OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) { return ovsType == OVS_VPORT_TYPE_VXLAN || + ovsType == OVS_VPORT_TYPE_STT || ovsType == OVS_VPORT_TYPE_GRE || ovsType == OVS_VPORT_TYPE_GRE64; } + +static __inline PVOID +GetOvsVportPriv(POVS_VPORT_ENTRY ovsVport) +{ + return ovsVport->priv; +} + static __inline BOOLEAN OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) { @@ -200,6 +211,40 @@ NTSTATUS OvsRemoveAndDeleteVport(PVOID usrParamsCtx, POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport, BOOLEAN hvDelete, BOOLEAN ovsDelete); +static __inline POVS_VPORT_ENTRY +OvsGetExternalVport(POVS_SWITCH_CONTEXT switchContext) +{ + return switchContext->virtualExternalVport; +} + +static __inline UINT32 +OvsGetExternalMtu(POVS_SWITCH_CONTEXT switchContext) +{ + ASSERT(OvsGetExternalVport(switchContext)); + return ((POVS_VPORT_ENTRY) OvsGetExternalVport(switchContext))->mtu; +} + +static __inline UINT16 +GetPortFromPriv(POVS_VPORT_ENTRY vport) +{ + UINT16 dstPort = 0; + PVOID vportPriv = GetOvsVportPriv(vport); + + /* XXX would better to have a commom tunnel "parent" structure */ + ASSERT(vportPriv); + switch(vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort; + break; + case OVS_VPORT_TYPE_STT: + dstPort = ((POVS_STT_VPORT)vportPriv)->dstPort; + break; + default: + ASSERT(! "Port is not a tunnel port"); + } + ASSERT(dstPort); + return dstPort; +} NDIS_STATUS InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport); diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 9935bdff0..fa6be666b 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -244,10 +244,10 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, /* L2 header */ ethHdr = (EthHdr *)bufferStart; - NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, - sizeof ethHdr->Destination + sizeof ethHdr->Source); ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); ethHdr->Type = htons(ETH_TYPE_IPV4); // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such, @@ -311,13 +311,11 @@ NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, OvsIPv4TunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl) { NTSTATUS status; OVS_FWD_INFO fwdInfo; - UNREFERENCED_PARAMETER(completionList); status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { @@ -420,15 +418,15 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, /* *---------------------------------------------------------------------------- - * OvsDoDecapVxlan + * OvsDecapVxlan * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'. *---------------------------------------------------------------------------- */ NDIS_STATUS -OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl) +OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; PMDL curMdl; diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index 0e2830496..448ee9722 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -65,14 +65,13 @@ NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, OvsIPv4TunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl); -NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl); +NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); static __inline UINT32 OvsGetVxlanTunHdrSize(VOID) diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 693bc50d1..705001592 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -90,6 +90,7 @@ + @@ -183,6 +184,7 @@ Create $(IntDir)\precomp.h.pch + @@ -202,4 +204,4 @@ - + \ No newline at end of file From d322ffc81fb1fe0b8a48f7dcacfb855b511baa5f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 11 Jun 2015 10:47:47 -0700 Subject: [PATCH 136/146] ovsdb-idl: Document that the IDL always presents a consistent view. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've had this question a couple of times so we might as well document it. Requested-by: Saurabh Shrivastava (सौरभ श्रीवास्तव) Signed-off-by: Ben Pfaff --- AUTHORS | 1 + lib/ovsdb-idl.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 24e854de7..936742fe9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -332,6 +332,7 @@ Ronaldo A. Ferreira ronaldof@CS.Princeton.EDU Ronny L. Bull bullrl@clarkson.edu Sander Eikelenboom linux@eikelenboom.it Saul St. John sstjohn@cs.wisc.edu +Saurabh Shrivastava (सौरभ श्रीवास्तव) saurabh@gmail.com Scott Hendricks shendricks@nicira.com Sean Brady sbrady@gtfservices.com Sebastian Andrzej Siewior sebastian@breakpoint.cc diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index 54df90de8..a49f84fb6 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. +/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,10 @@ * raw JSON into data structures that are easier for clients to digest. Most * notably, references to rows via UUID become C pointers. * + * The IDL always presents a consistent snapshot of the database to its client, + * that is, it won't present the effects of some part of a transaction applied + * at the database server without presenting all of its effects. + * * The IDL also assists with issuing database transactions. The client creates * a transaction, manipulates the IDL data structures, and commits or aborts * the transaction. The IDL then composes and issues the necessary JSON-RPC From 48954dab23eecfe895d7cb34c26587f400297618 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 9 Jun 2015 10:29:43 -0700 Subject: [PATCH 137/146] odp-util: Remove last use of odp_tun_key_from_attr for formatting. We formerly converted tunnel attributes to their flow representation before formatting but now perform all operations directly on the netlink attributes. There is one remaining use of odp_tun_key_from_attr() that is not used for the purposes of generating a flow. This is to check the mask but this no longer makes sense given the way that we format the flow itself. In fact, the code is not actually invoked any more, so we can simply remove it. This retains the special case for tunnels as a safety measure but it should not matter in practice. Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- lib/odp-util.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index f99683a24..76dc44bae 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1520,18 +1520,7 @@ odp_mask_is_exact(enum ovs_key_attr attr, const void *mask, size_t size) && ipv6_mask_is_exact((const struct in6_addr *)ipv6_mask->ipv6_dst); } if (attr == OVS_KEY_ATTR_TUNNEL) { - const struct flow_tnl *tun_mask = mask; - - return tun_mask->flags == FLOW_TNL_F_MASK - && tun_mask->tun_id == OVS_BE64_MAX - && tun_mask->ip_src == OVS_BE32_MAX - && tun_mask->ip_dst == OVS_BE32_MAX - && tun_mask->ip_tos == UINT8_MAX - && tun_mask->ip_ttl == UINT8_MAX - && tun_mask->tp_src == OVS_BE16_MAX - && tun_mask->tp_dst == OVS_BE16_MAX - && tun_mask->gbp_id == OVS_BE16_MAX - && tun_mask->gbp_flags == UINT8_MAX; + return false; } if (attr == OVS_KEY_ATTR_ARP) { @@ -1548,16 +1537,12 @@ odp_mask_is_exact(enum ovs_key_attr attr, const void *mask, size_t size) static bool odp_mask_attr_is_exact(const struct nlattr *ma) { - struct flow_tnl tun_mask; enum ovs_key_attr attr = nl_attr_type(ma); const void *mask; size_t size; if (attr == OVS_KEY_ATTR_TUNNEL) { - memset(&tun_mask, 0, sizeof tun_mask); - odp_tun_key_from_attr(ma, &tun_mask); - mask = &tun_mask; - size = sizeof tun_mask; + return false; } else { mask = nl_attr_get(ma); size = nl_attr_get_size(ma); From 8f8023b3eee4f1b633f16fa9ea31a14eac2445f8 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 15:53:42 -0700 Subject: [PATCH 138/146] classifier: Make traversing identical rules robust. The traversal of the list of identical rules from the lookup threads is fragile if the list head is removed during the list traversal. This patch simplifies the implementation of that list by making the list NULL terminated, singly linked RCU-protected list. By having the NULL at the end there is no longer a possiblity of missing the point when the list wraps around. This is significant when there can be multiple elements with the same priority in the list. This change also decreases the size of the struct cls_match back pre-'visibility' attribute size. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- lib/classifier-private.h | 91 ++++++++++++++++++++++++++- lib/classifier.c | 133 +++++++++++++++------------------------ tests/test-classifier.c | 2 +- 3 files changed, 141 insertions(+), 85 deletions(-) diff --git a/lib/classifier-private.h b/lib/classifier-private.h index 2703b75a7..a54037137 100644 --- a/lib/classifier-private.h +++ b/lib/classifier-private.h @@ -65,10 +65,17 @@ struct cls_partition { struct tag_tracker tracker; /* Tracks the bits in 'tags'. */ }; -/* Internal representation of a rule in a "struct cls_subtable". */ +/* Internal representation of a rule in a "struct cls_subtable". + * + * The 'next' member is an element in a singly linked, null-terminated list. + * This list links together identical "cls_match"es in order of decreasing + * priority. The classifier code maintains the invariant that at most one rule + * of a given priority is visible for any given lookup version. + */ struct cls_match { /* Accessed by everybody. */ - struct rculist list; /* Identical, lower-priority "cls_match"es. */ + OVSRCU_TYPE(struct cls_match *) next; /* Equal, lower-priority matches. */ + OVSRCU_TYPE(struct cls_conjunction_set *) conj_set; /* Accessed only by writers. */ struct cls_partition *partition; @@ -95,11 +102,13 @@ struct cls_match { atomic_llong visibility; const struct cls_rule *cls_rule; - OVSRCU_TYPE(struct cls_conjunction_set *) conj_set; const struct miniflow flow; /* Matching rule. Mask is in the subtable. */ /* 'flow' must be the last field. */ }; +/* Must be RCU postponed. */ +void cls_match_free_cb(struct cls_match *); + static inline void cls_match_set_visibility(struct cls_match *rule, long long version) { @@ -136,6 +145,82 @@ cls_match_is_eventually_invisible(const struct cls_match *rule) return visibility <= 0; } + +/* cls_match 'next' */ + +static inline const struct cls_match * +cls_match_next(const struct cls_match *rule) +{ + return ovsrcu_get(struct cls_match *, &rule->next); +} + +static inline struct cls_match * +cls_match_next_protected(const struct cls_match *rule) +{ + return ovsrcu_get_protected(struct cls_match *, &rule->next); +} + +/* Puts 'rule' in the position between 'prev' and 'next'. If 'prev' == NULL, + * then the 'rule' is the new list head, and if 'next' == NULL, the rule is the + * new list tail. + * If there are any nodes between 'prev' and 'next', they are dropped from the + * list. */ +static inline void +cls_match_insert(struct cls_match *prev, struct cls_match *next, + struct cls_match *rule) +{ + ovsrcu_set_hidden(&rule->next, next); + + if (prev) { + ovsrcu_set(&prev->next, rule); + } +} + +/* Puts 'new_rule' in the position of 'old_rule', which is the next node after + * 'prev'. If 'prev' == NULL, then the 'new_rule' is the new list head. + * + * The replaced cls_match still links to the later rules, and may still be + * referenced by other threads until all other threads quiesce. The replaced + * rule may not be re-inserted, re-initialized, or deleted until after all + * other threads have quiesced (use ovsrcu_postpone). */ +static inline void +cls_match_replace(struct cls_match *prev, + struct cls_match *old_rule, struct cls_match *new_rule) +{ + cls_match_insert(prev, cls_match_next_protected(old_rule), new_rule); +} + +/* Removes 'rule' following 'prev' from the list. If 'prev' is NULL, then the + * 'rule' is a list head, and the caller is responsible for maintaining its + * list head pointer (if any). + * + * Afterward, the removed rule is not linked to any more, but still links to + * the following rules, and may still be referenced by other threads until all + * other threads quiesce. The removed rule may not be re-inserted, + * re-initialized, or deleted until after all other threads have quiesced (use + * ovsrcu_postpone). + */ +static inline void +cls_match_remove(struct cls_match *prev, struct cls_match *rule) +{ + if (prev) { + ovsrcu_set(&prev->next, cls_match_next_protected(rule)); + } +} + +#define CLS_MATCH_FOR_EACH(ITER, HEAD) \ + for ((ITER) = (HEAD); (ITER); (ITER) = cls_match_next(ITER)) + +#define CLS_MATCH_FOR_EACH_AFTER_HEAD(ITER, HEAD) \ + CLS_MATCH_FOR_EACH(ITER, cls_match_next(HEAD)) + +/* Iterate cls_matches keeping the previous pointer for modifications. */ +#define FOR_EACH_RULE_IN_LIST_PROTECTED(ITER, PREV, HEAD) \ + for ((PREV) = NULL, (ITER) = (HEAD); \ + (ITER); \ + (PREV) = (ITER), (ITER) = cls_match_next_protected(ITER)) + + /* A longest-prefix match tree. */ struct trie_node { uint32_t prefix; /* Prefix bits for this node, MSB first. */ diff --git a/lib/classifier.c b/lib/classifier.c index 2b2d3f646..50bbbbf3f 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -96,7 +96,7 @@ cls_match_alloc(const struct cls_rule *rule, = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values + MINIFLOW_VALUES_SIZE(count)); - rculist_init(&cls_match->list); + ovsrcu_init(&cls_match->next, NULL); *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule; *CONST_CAST(int *, &cls_match->priority) = rule->priority; atomic_init(&cls_match->visibility, 0); /* Initially invisible. */ @@ -123,66 +123,24 @@ static const struct cls_match *find_match_wc(const struct cls_subtable *, static struct cls_match *find_equal(const struct cls_subtable *, const struct miniflow *, uint32_t hash); -static inline const struct cls_match * -next_rule_in_list__(const struct cls_match *rule) -{ - const struct cls_match *next = NULL; - next = OBJECT_CONTAINING(rculist_next(&rule->list), next, list); - return next; -} - -static inline const struct cls_match * -next_rule_in_list(const struct cls_match *rule, const struct cls_match *head) -{ - const struct cls_match *next = next_rule_in_list__(rule); - return next != head ? next : NULL; -} - -/* Return the next lower-priority rule in the list that is visible. Multiple - * identical rules with the same priority may exist transitionally. In that - * case the first rule of a given priority has been marked as visible in one - * version and the later rules are marked as visible on the other version. - * This makes it possible to for the head and tail of the list have the same - * priority. */ +/* Return the next visible (lower-priority) rule in the list. Multiple + * identical rules with the same priority may exist transitionally, but when + * versioning is used at most one of them is ever visible for lookups on any + * given 'version'. */ static inline const struct cls_match * next_visible_rule_in_list(const struct cls_match *rule, long long version) { - const struct cls_match *next = rule; - do { - next = next_rule_in_list__(next); - if (next->priority > rule->priority || next == rule) { + rule = cls_match_next(rule); + if (!rule) { /* We have reached the head of the list, stop. */ - return NULL; + break; } - } while (!cls_match_visible_in_version(next, version)); + } while (!cls_match_visible_in_version(rule, version)); - return next; + return rule; } -static inline struct cls_match * -next_rule_in_list_protected__(struct cls_match *rule) -{ - struct cls_match *next = NULL; - next = OBJECT_CONTAINING(rculist_next_protected(&rule->list), next, list); - return next; -} - -static inline struct cls_match * -next_rule_in_list_protected(struct cls_match *rule, struct cls_match *head) -{ - struct cls_match *next = next_rule_in_list_protected__(rule); - return next != head ? next : NULL; -} - -/* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ -#define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ - for ((RULE) = (HEAD); (RULE) != NULL; \ - (RULE) = next_rule_in_list(RULE, HEAD)) -#define FOR_EACH_RULE_IN_LIST_PROTECTED(RULE, HEAD) \ - for ((RULE) = (HEAD); (RULE) != NULL; \ - (RULE) = next_rule_in_list_protected(RULE, HEAD)) - static unsigned int minimask_get_prefix_len(const struct minimask *, const struct mf_field *); static void trie_init(struct classifier *cls, int trie_idx, @@ -720,12 +678,12 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, } n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash); } else { /* Equal rules exist in the classifier already. */ - struct cls_match *iter; + struct cls_match *prev, *iter; /* Scan the list for the insertion point that will keep the list in * order of decreasing priority. Insert after rules marked invisible * in any version of the same priority. */ - FOR_EACH_RULE_IN_LIST_PROTECTED (iter, head) { + FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) { if (rule->priority > iter->priority || (rule->priority == iter->priority && !cls_match_is_eventually_invisible(iter))) { @@ -733,15 +691,16 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, } } - /* 'iter' now at the insertion point or NULL if at end. */ + /* Replace 'iter' with 'new' or insert 'new' between 'prev' and + * 'iter'. */ if (iter) { struct cls_rule *old; if (rule->priority == iter->priority) { - rculist_replace(&new->list, &iter->list); + cls_match_replace(prev, iter, new); old = CONST_CAST(struct cls_rule *, iter->cls_rule); } else { - rculist_insert(&iter->list, &new->list); + cls_match_insert(prev, iter, new); old = NULL; } @@ -761,7 +720,7 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, ovsrcu_postpone(free, conj_set); } - ovsrcu_postpone(free, iter); + ovsrcu_postpone(cls_match_free_cb, iter); old->cls_match = NULL; /* No change in subtable's max priority or max count. */ @@ -778,7 +737,8 @@ classifier_replace(struct classifier *cls, const struct cls_rule *rule, return old; } } else { - rculist_push_back(&head->list, &new->list); + /* 'new' is new node after 'prev' */ + cls_match_insert(prev, iter, new); } } @@ -843,7 +803,7 @@ classifier_insert(struct classifier *cls, const struct cls_rule *rule, const struct cls_rule * classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule) { - struct cls_match *rule, *prev, *next; + struct cls_match *rule, *prev, *next, *head; struct cls_partition *partition; struct cls_conjunction_set *conj_set; struct cls_subtable *subtable; @@ -862,19 +822,6 @@ classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule) /* Remove 'cls_rule' from the subtable's rules list. */ rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node)); - INIT_CONTAINER(prev, rculist_back_protected(&rule->list), list); - INIT_CONTAINER(next, rculist_next(&rule->list), list); - - /* Remove from the list of equal rules. */ - rculist_remove(&rule->list); - - /* Cheap check for a non-head rule. */ - if (prev->priority > rule->priority) { - /* Not the highest priority rule, no need to check subtable's - * 'max_priority'. */ - goto free; - } - subtable = find_subtable(cls, &cls_rule->match.mask); ovs_assert(subtable); @@ -886,16 +833,30 @@ classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule) hash = minimatch_hash_range(&cls_rule->match, prev_be64ofs, FLOW_U64S, &basis); + head = find_equal(subtable, &cls_rule->match.flow, hash); + /* Check if the rule is not the head rule. */ - if (rule != prev && - rule != find_equal(subtable, &cls_rule->match.flow, hash)) { + if (rule != head) { + struct cls_match *iter; + /* Not the head rule, but potentially one with the same priority. */ + /* Remove from the list of equal rules. */ + FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) { + if (rule == iter) { + break; + } + } + ovs_assert(iter == rule); + + cls_match_remove(prev, rule); + goto check_priority; } /* 'rule' is the head rule. Check if there is another rule to * replace 'rule' in the data structures. */ - if (next != rule) { + next = cls_match_next_protected(rule); + if (next) { subtable_replace_head_rule(cls, subtable, rule, next, hash, ihash); goto check_priority; } @@ -959,13 +920,13 @@ check_priority: pvector_publish(&cls->subtables); } -free: + /* free the rule. */ conj_set = ovsrcu_get_protected(struct cls_conjunction_set *, &rule->conj_set); if (conj_set) { ovsrcu_postpone(free, conj_set); } - ovsrcu_postpone(free, rule); + ovsrcu_postpone(cls_match_free_cb, rule); cls->n_rules--; return cls_rule; @@ -1364,7 +1325,7 @@ classifier_find_rule_exactly(const struct classifier *cls, if (!head) { return NULL; } - FOR_EACH_RULE_IN_LIST (rule, head) { + CLS_MATCH_FOR_EACH (rule, head) { if (rule->priority < target->priority) { break; /* Not found. */ } @@ -1788,7 +1749,7 @@ find_match(const struct cls_subtable *subtable, long long version, &subtable->mask, flow))) { /* Return highest priority rule that is visible. */ - FOR_EACH_RULE_IN_LIST(rule, head) { + CLS_MATCH_FOR_EACH (rule, head) { if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) { return rule; } @@ -1897,7 +1858,7 @@ find_match_wc(const struct cls_subtable *subtable, long long version, if (miniflow_and_mask_matches_flow_wc(&head->flow, &subtable->mask, flow, wc)) { /* Return highest priority rule that is visible. */ - FOR_EACH_RULE_IN_LIST(rule, head) { + CLS_MATCH_FOR_EACH (rule, head) { if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) { return rule; @@ -2399,3 +2360,13 @@ trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen) * that actually exist in the classifier are ever removed. */ VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); } + + +#define CLS_MATCH_POISON (struct cls_match *)(UINTPTR_MAX / 0xf * 0xb) + +void +cls_match_free_cb(struct cls_match *rule) +{ + ovsrcu_set_hidden(&rule->next, CLS_MATCH_POISON); + free(rule); +} diff --git a/tests/test-classifier.c b/tests/test-classifier.c index 24fc5eb11..cb65533ba 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -560,7 +560,7 @@ check_tables(const struct classifier *cls, int n_tables, int n_rules, } found_rules++; - RCULIST_FOR_EACH (rule, list, &head->list) { + CLS_MATCH_FOR_EACH_AFTER_HEAD (rule, head) { assert(rule->priority < prev_priority); assert(rule->priority <= table->max_priority); From 3bbe9a1fdade79e94c0f4880cc5c17eee64df79e Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 15:53:43 -0700 Subject: [PATCH 139/146] test-classifier: Test versioning features. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- tests/classifier.at | 8 +- tests/test-classifier.c | 301 +++++++++++++++++++++++++++++++--------- 2 files changed, 245 insertions(+), 64 deletions(-) diff --git a/tests/classifier.at b/tests/classifier.at index cfa1bc79d..1e75123c5 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -6,11 +6,15 @@ m4_foreach( [single-rule], [rule-replacement], [many-rules-in-one-list], + [versioned many-rules-in-one-list], [many-rules-in-one-table], + [versioned many-rules-in-one-table], [many-rules-in-two-tables], - [many-rules-in-five-tables]], + [versioned many-rules-in-two-tables], + [many-rules-in-five-tables], + [versioned many-rules-in-five-tables]], [AT_SETUP([flow classifier - m4_bpatsubst(testname, [-], [ ])]) - AT_CHECK([ovstest test-classifier testname], [0], [], []) + AT_CHECK([ovstest test-classifier m4_bpatsubst(testname, [versioned], [--versioned])], [0], [], []) AT_CLEANUP])]) AT_BANNER([miniflow unit tests]) diff --git a/tests/test-classifier.c b/tests/test-classifier.c index cb65533ba..9e837c369 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -42,6 +42,8 @@ #include "unaligned.h" #include "util.h" +static bool versioned = false; + /* Fields in a rule. */ #define CLS_FIELDS \ /* struct flow all-caps */ \ @@ -88,6 +90,7 @@ static const struct cls_field cls_fields[CLS_N_FIELDS] = { }; struct test_rule { + struct ovs_list list_node; int aux; /* Auxiliary data. */ struct cls_rule cls_rule; /* Classifier rule data. */ }; @@ -107,7 +110,8 @@ test_rule_destroy(struct test_rule *rule) } } -static struct test_rule *make_rule(int wc_fields, int priority, int value_pat); +static struct test_rule *make_rule(int wc_fields, int priority, int value_pat, + long long version); static void free_rule(struct test_rule *); static struct test_rule *clone_rule(const struct test_rule *); @@ -398,12 +402,13 @@ get_value(unsigned int *x, unsigned n_values) } static void -compare_classifiers(struct classifier *cls, struct tcls *tcls) +compare_classifiers(struct classifier *cls, size_t n_invisible_rules, + long long version, struct tcls *tcls) { static const int confidence = 500; unsigned int i; - assert(classifier_count(cls) == tcls->n_rules); + assert(classifier_count(cls) == tcls->n_rules + n_invisible_rules); for (i = 0; i < confidence; i++) { const struct cls_rule *cr0, *cr1, *cr2; struct flow flow; @@ -433,7 +438,7 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls) /* This assertion is here to suppress a GCC 4.9 array-bounds warning */ ovs_assert(cls->n_tries <= CLS_MAX_TRIES); - cr0 = classifier_lookup(cls, CLS_MAX_VERSION, &flow, &wc); + cr0 = classifier_lookup(cls, version, &flow, &wc); cr1 = tcls_lookup(tcls, &flow); assert((cr0 == NULL) == (cr1 == NULL)); if (cr0 != NULL) { @@ -442,8 +447,12 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls) assert(cls_rule_equal(cr0, cr1)); assert(tr0->aux == tr1->aux); + + /* Make sure the rule should have been visible. */ + assert(cr0->cls_match); + assert(cls_match_visible_in_version(cr0->cls_match, version)); } - cr2 = classifier_lookup(cls, CLS_MAX_VERSION, &flow, NULL); + cr2 = classifier_lookup(cls, version, &flow, NULL); assert(cr2 == cr0); } } @@ -511,14 +520,17 @@ verify_tries(struct classifier *cls) static void check_tables(const struct classifier *cls, int n_tables, int n_rules, - int n_dups) + int n_dups, int n_invisible, long long version) OVS_NO_THREAD_SAFETY_ANALYSIS { const struct cls_subtable *table; struct test_rule *test_rule; int found_tables = 0; + int found_tables_with_visible_rules = 0; int found_rules = 0; int found_dups = 0; + int found_invisible = 0; + int found_visible_but_removable = 0; int found_rules2 = 0; pvector_verify(&cls->subtables); @@ -527,6 +539,7 @@ check_tables(const struct classifier *cls, int n_tables, int n_rules, int max_priority = INT_MIN; unsigned int max_count = 0; bool found = false; + bool found_visible_rules = false; const struct cls_subtable *iter; /* Locate the subtable from 'subtables'. */ @@ -548,47 +561,105 @@ check_tables(const struct classifier *cls, int n_tables, int n_rules, == (table->ports_mask_len ? cmap_count(&table->rules) : 0)); found_tables++; + CMAP_FOR_EACH (head, cmap_node, &table->rules) { int prev_priority = INT_MAX; - const struct cls_match *rule; + long long prev_version = 0; + const struct cls_match *rule, *prev; + bool found_visible_rules_in_list = false; + + assert(head->priority <= table->max_priority); if (head->priority > max_priority) { max_priority = head->priority; - max_count = 1; - } else if (head->priority == max_priority) { - ++max_count; + max_count = 0; } - found_rules++; - CLS_MATCH_FOR_EACH_AFTER_HEAD (rule, head) { - assert(rule->priority < prev_priority); - assert(rule->priority <= table->max_priority); + FOR_EACH_RULE_IN_LIST_PROTECTED(rule, prev, head) { + long long rule_version; + const struct cls_rule *found_rule; + + /* Priority may not increase. */ + assert(rule->priority <= prev_priority); + + if (rule->priority == max_priority) { + ++max_count; + } + + /* Count invisible rules and visible duplicates. */ + if (!cls_match_visible_in_version(rule, version)) { + found_invisible++; + } else { + if (cls_match_is_eventually_invisible(rule)) { + found_visible_but_removable++; + } + if (found_visible_rules_in_list) { + found_dups++; + } + found_visible_rules_in_list = true; + found_visible_rules = true; + } + + /* Rule must be visible in the version it was inserted. */ + rule_version = rule->cls_rule->version; + assert(cls_match_visible_in_version(rule, rule_version)); + + /* We should always find the latest version of the rule, + * unless all rules have been marked for removal. + * Later versions must always be later in the list. */ + found_rule = classifier_find_rule_exactly(cls, rule->cls_rule); + if (found_rule && found_rule != rule->cls_rule) { + + assert(found_rule->priority == rule->priority); + + /* Found rule may not have a lower version. */ + assert(found_rule->version >= rule_version); + + /* This rule must not be visible in the found rule's + * version. */ + assert(!cls_match_visible_in_version(rule, + found_rule->version)); + } + + if (rule->priority == prev_priority) { + /* Exact duplicate rule may not have a lower version. */ + assert(rule_version >= prev_version); + + /* Previous rule must not be visible in rule's version. */ + assert(!cls_match_visible_in_version(prev, rule_version)); + } prev_priority = rule->priority; + prev_version = rule_version; found_rules++; - found_dups++; - assert(classifier_find_rule_exactly(cls, rule->cls_rule) - == rule->cls_rule); } } + + if (found_visible_rules) { + found_tables_with_visible_rules++; + } + assert(table->max_priority == max_priority); assert(table->max_count == max_count); } assert(found_tables == cmap_count(&cls->subtables_map)); assert(found_tables == pvector_count(&cls->subtables)); - assert(n_tables == -1 || n_tables == cmap_count(&cls->subtables_map)); - assert(n_rules == -1 || found_rules == n_rules); + assert(n_tables == -1 || n_tables == found_tables_with_visible_rules); + assert(n_rules == -1 || found_rules == n_rules + found_invisible); assert(n_dups == -1 || found_dups == n_dups); + assert(found_invisible == n_invisible); CLS_FOR_EACH (test_rule, cls_rule, cls) { found_rules2++; } - assert(found_rules == found_rules2); + /* Iteration does not see removable rules. */ + assert(found_rules + == found_rules2 + found_visible_but_removable + found_invisible); } static struct test_rule * -make_rule(int wc_fields, int priority, int value_pat) +make_rule(int wc_fields, int priority, int value_pat, long long version) { const struct cls_field *f; struct test_rule *rule; @@ -634,8 +705,9 @@ make_rule(int wc_fields, int priority, int value_pat) rule = xzalloc(sizeof *rule); cls_rule_init(&rule->cls_rule, &match, wc_fields - ? (priority == INT_MIN ? priority + 1 : priority) - : INT_MAX, CLS_MIN_VERSION); + ? (priority == INT_MIN ? priority + 1 : + priority == INT_MAX ? priority - 1 : priority) + : 0, version); return rule; } @@ -705,7 +777,7 @@ test_empty(struct ovs_cmdl_context *ctx OVS_UNUSED) tcls_init(&tcls); assert(classifier_is_empty(&cls)); assert(tcls_is_empty(&tcls)); - compare_classifiers(&cls, &tcls); + compare_classifiers(&cls, 0, CLS_MIN_VERSION, &tcls); classifier_destroy(&cls); tcls_destroy(&tcls); } @@ -729,22 +801,22 @@ test_single_rule(struct ovs_cmdl_context *ctx OVS_UNUSED) struct tcls tcls; rule = make_rule(wc_fields, - hash_bytes(&wc_fields, sizeof wc_fields, 0), 0); - + hash_bytes(&wc_fields, sizeof wc_fields, 0), 0, + CLS_MIN_VERSION); classifier_init(&cls, flow_segment_u64s); set_prefix_fields(&cls); tcls_init(&tcls); - tcls_rule = tcls_insert(&tcls, rule); + classifier_insert(&cls, &rule->cls_rule, NULL, 0); - compare_classifiers(&cls, &tcls); - check_tables(&cls, 1, 1, 0); + compare_classifiers(&cls, 0, CLS_MIN_VERSION, &tcls); + check_tables(&cls, 1, 1, 0, 0, CLS_MIN_VERSION); classifier_remove(&cls, &rule->cls_rule); tcls_remove(&tcls, tcls_rule); assert(classifier_is_empty(&cls)); assert(tcls_is_empty(&tcls)); - compare_classifiers(&cls, &tcls); + compare_classifiers(&cls, 0, CLS_MIN_VERSION, &tcls); ovsrcu_postpone(free_rule, rule); classifier_destroy(&cls); @@ -764,8 +836,10 @@ test_rule_replacement(struct ovs_cmdl_context *ctx OVS_UNUSED) struct test_rule *rule2; struct tcls tcls; - rule1 = make_rule(wc_fields, OFP_DEFAULT_PRIORITY, UINT_MAX); - rule2 = make_rule(wc_fields, OFP_DEFAULT_PRIORITY, UINT_MAX); + rule1 = make_rule(wc_fields, OFP_DEFAULT_PRIORITY, UINT_MAX, + CLS_MIN_VERSION); + rule2 = make_rule(wc_fields, OFP_DEFAULT_PRIORITY, UINT_MAX, + CLS_MIN_VERSION); rule2->aux += 5; rule2->aux += 5; @@ -774,8 +848,8 @@ test_rule_replacement(struct ovs_cmdl_context *ctx OVS_UNUSED) tcls_init(&tcls); tcls_insert(&tcls, rule1); classifier_insert(&cls, &rule1->cls_rule, NULL, 0); - compare_classifiers(&cls, &tcls); - check_tables(&cls, 1, 1, 0); + compare_classifiers(&cls, 0, CLS_MIN_VERSION, &tcls); + check_tables(&cls, 1, 1, 0, 0, CLS_MIN_VERSION); tcls_destroy(&tcls); tcls_init(&tcls); @@ -785,8 +859,8 @@ test_rule_replacement(struct ovs_cmdl_context *ctx OVS_UNUSED) classifier_replace(&cls, &rule2->cls_rule, NULL, 0)) == rule1); ovsrcu_postpone(free_rule, rule1); - compare_classifiers(&cls, &tcls); - check_tables(&cls, 1, 1, 0); + compare_classifiers(&cls, 0, CLS_MIN_VERSION, &tcls); + check_tables(&cls, 1, 1, 0, 0, CLS_MIN_VERSION); classifier_defer(&cls); classifier_remove(&cls, &rule2->cls_rule); @@ -876,11 +950,13 @@ test_many_rules_in_one_list (struct ovs_cmdl_context *ctx OVS_UNUSED) int pri_rules[N_RULES]; struct classifier cls; struct tcls tcls; + long long version = CLS_MIN_VERSION; + size_t n_invisible_rules = 0; n_permutations++; for (i = 0; i < N_RULES; i++) { - rules[i] = make_rule(456, pris[i], 0); + rules[i] = make_rule(456, pris[i], 0, version); tcls_rules[i] = NULL; pri_rules[i] = -1; } @@ -890,16 +966,36 @@ test_many_rules_in_one_list (struct ovs_cmdl_context *ctx OVS_UNUSED) tcls_init(&tcls); for (i = 0; i < ARRAY_SIZE(ops); i++) { + struct test_rule *displaced_rule = NULL; + struct cls_rule *removable_rule = NULL; int j = ops[i]; int m, n; if (!tcls_rules[j]) { - struct test_rule *displaced_rule; - tcls_rules[j] = tcls_insert(&tcls, rules[j]); - displaced_rule = test_rule_from_cls_rule( - classifier_replace(&cls, &rules[j]->cls_rule, - NULL, 0)); + if (versioned) { + /* Insert the new rule in the next version. */ + *CONST_CAST(long long *, &rules[j]->cls_rule.version) + = ++version; + + displaced_rule = test_rule_from_cls_rule( + classifier_find_rule_exactly(&cls, + &rules[j]->cls_rule)); + if (displaced_rule) { + /* Mark the old rule for removal after the current + * version. */ + cls_rule_make_invisible_in_version( + &displaced_rule->cls_rule, version, + version - 1); + n_invisible_rules++; + removable_rule = &displaced_rule->cls_rule; + } + classifier_insert(&cls, &rules[j]->cls_rule, NULL, 0); + } else { + displaced_rule = test_rule_from_cls_rule( + classifier_replace(&cls, &rules[j]->cls_rule, + NULL, 0)); + } if (pri_rules[pris[j]] >= 0) { int k = pri_rules[pris[j]]; assert(displaced_rule != NULL); @@ -911,18 +1007,37 @@ test_many_rules_in_one_list (struct ovs_cmdl_context *ctx OVS_UNUSED) } pri_rules[pris[j]] = j; } else { - classifier_remove(&cls, &rules[j]->cls_rule); + if (versioned) { + /* Mark the rule for removal after the current + * version. */ + cls_rule_make_invisible_in_version( + &rules[j]->cls_rule, version + 1, version); + ++version; + n_invisible_rules++; + removable_rule = &rules[j]->cls_rule; + } else { + classifier_remove(&cls, &rules[j]->cls_rule); + } tcls_remove(&tcls, tcls_rules[j]); tcls_rules[j] = NULL; pri_rules[pris[j]] = -1; } - compare_classifiers(&cls, &tcls); - + compare_classifiers(&cls, n_invisible_rules, version, &tcls); n = 0; for (m = 0; m < N_RULES; m++) { n += tcls_rules[m] != NULL; } - check_tables(&cls, n > 0, n, n - 1); + check_tables(&cls, n > 0, n, n - 1, n_invisible_rules, + version); + + if (versioned && removable_rule) { + /* Removable rule is no longer visible. */ + assert(removable_rule->cls_match); + assert(!cls_match_visible_in_version( + removable_rule->cls_match, version)); + classifier_remove(&cls, removable_rule); + n_invisible_rules--; + } } classifier_defer(&cls); @@ -978,6 +1093,8 @@ test_many_rules_in_one_table(struct ovs_cmdl_context *ctx OVS_UNUSED) struct test_rule *tcls_rules[N_RULES]; struct classifier cls; struct tcls tcls; + long long version = CLS_MIN_VERSION; + size_t n_invisible_rules = 0; int value_pats[N_RULES]; int value_mask; int wcf; @@ -999,22 +1116,44 @@ test_many_rules_in_one_table(struct ovs_cmdl_context *ctx OVS_UNUSED) value_pats[i] = random_uint32() & value_mask; } while (array_contains(value_pats, i, value_pats[i])); - rules[i] = make_rule(wcf, priority, value_pats[i]); + ++version; + rules[i] = make_rule(wcf, priority, value_pats[i], version); tcls_rules[i] = tcls_insert(&tcls, rules[i]); classifier_insert(&cls, &rules[i]->cls_rule, NULL, 0); - compare_classifiers(&cls, &tcls); + compare_classifiers(&cls, n_invisible_rules, version, &tcls); - check_tables(&cls, 1, i + 1, 0); + check_tables(&cls, 1, i + 1, 0, n_invisible_rules, version); } for (i = 0; i < N_RULES; i++) { tcls_remove(&tcls, tcls_rules[i]); - classifier_remove(&cls, &rules[i]->cls_rule); - compare_classifiers(&cls, &tcls); - ovsrcu_postpone(free_rule, rules[i]); + if (versioned) { + /* Mark the rule for removal after the current version. */ + cls_rule_make_invisible_in_version(&rules[i]->cls_rule, + version + 1, version); + ++version; + n_invisible_rules++; + } else { + classifier_remove(&cls, &rules[i]->cls_rule); + } + compare_classifiers(&cls, n_invisible_rules, version, &tcls); + check_tables(&cls, i < N_RULES - 1, N_RULES - (i + 1), 0, + n_invisible_rules, version); + if (!versioned) { + ovsrcu_postpone(free_rule, rules[i]); + } + } - check_tables(&cls, i < N_RULES - 1, N_RULES - (i + 1), 0); + if (versioned) { + for (i = 0; i < N_RULES; i++) { + classifier_remove(&cls, &rules[i]->cls_rule); + n_invisible_rules--; + + compare_classifiers(&cls, n_invisible_rules, version, &tcls); + check_tables(&cls, 0, 0, 0, n_invisible_rules, version); + ovsrcu_postpone(free_rule, rules[i]); + } } classifier_destroy(&cls); @@ -1043,6 +1182,9 @@ test_many_rules_in_n_tables(int n_tables) int priorities[MAX_RULES]; struct classifier cls; struct tcls tcls; + long long version = CLS_MIN_VERSION; + size_t n_invisible_rules = 0; + struct ovs_list list = OVS_LIST_INITIALIZER(&list); random_set_seed(iteration + 1); for (i = 0; i < MAX_RULES; i++) { @@ -1059,29 +1201,57 @@ test_many_rules_in_n_tables(int n_tables) int priority = priorities[i]; int wcf = wcfs[random_range(n_tables)]; int value_pat = random_uint32() & ((1u << CLS_N_FIELDS) - 1); - rule = make_rule(wcf, priority, value_pat); + rule = make_rule(wcf, priority, value_pat, version); tcls_insert(&tcls, rule); classifier_insert(&cls, &rule->cls_rule, NULL, 0); - compare_classifiers(&cls, &tcls); - check_tables(&cls, -1, i + 1, -1); + compare_classifiers(&cls, n_invisible_rules, version, &tcls); + check_tables(&cls, -1, i + 1, -1, n_invisible_rules, version); } - while (!classifier_is_empty(&cls)) { + while (classifier_count(&cls) - n_invisible_rules > 0) { struct test_rule *target; struct test_rule *rule; + size_t n_removable_rules = 0; target = clone_rule(tcls.rules[random_range(tcls.n_rules)]); CLS_FOR_EACH_TARGET (rule, cls_rule, &cls, &target->cls_rule) { - if (classifier_remove(&cls, &rule->cls_rule)) { + if (versioned) { + /* Mark the rule for removal after the current version. */ + cls_rule_make_invisible_in_version(&rule->cls_rule, + version + 1, version); + n_removable_rules++; + compare_classifiers(&cls, n_invisible_rules, version, + &tcls); + check_tables(&cls, -1, -1, -1, n_invisible_rules, version); + + list_push_back(&list, &rule->list_node); + } else if (classifier_remove(&cls, &rule->cls_rule)) { ovsrcu_postpone(free_rule, rule); } } + ++version; + n_invisible_rules += n_removable_rules; + tcls_delete_matches(&tcls, &target->cls_rule); - compare_classifiers(&cls, &tcls); - check_tables(&cls, -1, -1, -1); free_rule(target); + + compare_classifiers(&cls, n_invisible_rules, version, &tcls); + check_tables(&cls, -1, -1, -1, n_invisible_rules, version); + } + if (versioned) { + struct test_rule *rule; + + /* Remove rules that are no longer visible. */ + LIST_FOR_EACH_POP (rule, list_node, &list) { + classifier_remove(&cls, &rule->cls_rule); + n_invisible_rules--; + + compare_classifiers(&cls, n_invisible_rules, version, + &tcls); + check_tables(&cls, -1, -1, -1, n_invisible_rules, version); + } } destroy_classifier(&cls); @@ -1406,8 +1576,8 @@ static const struct ovs_cmdl_command commands[] = { {"destroy-null", NULL, 0, 0, test_destroy_null}, {"single-rule", NULL, 0, 0, test_single_rule}, {"rule-replacement", NULL, 0, 0, test_rule_replacement}, - {"many-rules-in-one-list", NULL, 0, 0, test_many_rules_in_one_list}, - {"many-rules-in-one-table", NULL, 0, 0, test_many_rules_in_one_table}, + {"many-rules-in-one-list", NULL, 0, 1, test_many_rules_in_one_list}, + {"many-rules-in-one-table", NULL, 0, 1, test_many_rules_in_one_table}, {"many-rules-in-two-tables", NULL, 0, 0, test_many_rules_in_two_tables}, {"many-rules-in-five-tables", NULL, 0, 0, test_many_rules_in_five_tables}, @@ -1427,6 +1597,13 @@ test_classifier_main(int argc, char *argv[]) .argv = argv + 1, }; set_program_name(argv[0]); + + if (argc > 1 && !strcmp(argv[1], "--versioned")) { + versioned = true; + ctx.argc--; + ctx.argv++; + } + init_values(); ovs_cmdl_run_command(&ctx, commands); } From 621b8064b7f8921576dcba1c4b292ba1f6644061 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 15:53:43 -0700 Subject: [PATCH 140/146] ofproto: Infra for table versioning. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto-dpif-xlate.c | 17 +++++++++++++- ofproto/ofproto-dpif.c | 43 ++++++++++++++++++++++++++++-------- ofproto/ofproto-dpif.h | 5 ++++- ofproto/ofproto-provider.h | 6 +++++ ofproto/ofproto.c | 5 +++++ 5 files changed, 65 insertions(+), 11 deletions(-) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 59cd088db..337d6f8c2 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -159,6 +159,9 @@ struct xlate_ctx { const struct xbridge *xbridge; + /* Flow tables version at the beginning of the translation. */ + long long tables_version; + /* Flow at the last commit. */ struct flow base_flow; @@ -2774,6 +2777,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; bool old_was_mpls = ctx->was_mpls; + long long old_version = ctx->tables_version; enum slow_path_reason special; struct ofpbuf old_stack = ctx->stack; union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; @@ -2789,6 +2793,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, memset(flow->regs, 0, sizeof flow->regs); flow->actset_output = OFPP_UNSET; + /* The bridge is now known so obtain its table version. */ + ctx->tables_version + = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto); + special = process_special(ctx, &ctx->xin->flow, peer, ctx->xin->packet); if (special) { @@ -2835,6 +2843,9 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, ofpbuf_uninit(&ctx->stack); ctx->stack = old_stack; + /* Restore calling bridge's lookup version. */ + ctx->tables_version = old_version; + /* The peer bridge popping MPLS should have no effect on the original * bridge. */ ctx->was_mpls = old_was_mpls; @@ -3056,6 +3067,7 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, wc = (ctx->xin->skip_wildcards) ? NULL : &ctx->xout->wc; rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto, + ctx->tables_version, &ctx->xin->flow, wc, ctx->xin->xcache != NULL, ctx->xin->resubmit_stats, @@ -4826,9 +4838,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) flow->recirc_id); return; } + /* The bridge is now known so obtain its table version. */ + ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto); if (!xin->ofpacts && !ctx.rule) { - rule = rule_dpif_lookup_from_table(ctx.xbridge->ofproto, flow, wc, + rule = rule_dpif_lookup_from_table(ctx.xbridge->ofproto, + ctx.tables_version, flow, wc, ctx.xin->xcache != NULL, ctx.xin->resubmit_stats, &ctx.table_id, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 81beca049..cc5d9d4d7 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -287,6 +287,8 @@ struct ofproto_dpif { struct ofproto up; struct dpif_backer *backer; + atomic_llong tables_version; /* Version # to use in classifier lookups. */ + uint64_t dump_seq; /* Last read of udpif_dump_seq(). */ /* Special OpenFlow rules. */ @@ -1227,6 +1229,7 @@ construct(struct ofproto *ofproto_) return error; } + atomic_init(&ofproto->tables_version, CLS_MIN_VERSION); ofproto->netflow = NULL; ofproto->sflow = NULL; ofproto->ipfix = NULL; @@ -1605,6 +1608,15 @@ query_tables(struct ofproto *ofproto, } } +static void +set_tables_version(struct ofproto *ofproto_, long long version) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + atomic_store_relaxed(&ofproto->tables_version, version); +} + + static struct ofport * port_alloc(void) { @@ -3709,6 +3721,15 @@ rule_set_recirc_id(struct rule *rule_, uint32_t id) ovs_mutex_unlock(&rule->up.mutex); } +long long +ofproto_dpif_get_tables_version(struct ofproto_dpif *ofproto) +{ + long long version; + + atomic_read_relaxed(&ofproto->tables_version, &version); + return version; +} + /* The returned rule (if any) is valid at least until the next RCU quiescent * period. If the rule needs to stay around longer, a non-zero 'take_ref' * must be passed in to cause a reference to be taken on it. @@ -3716,16 +3737,16 @@ rule_set_recirc_id(struct rule *rule_, uint32_t id) * 'flow' is non-const to allow for temporary modifications during the lookup. * Any changes are restored before returning. */ static struct rule_dpif * -rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, - struct flow *flow, struct flow_wildcards *wc, - bool take_ref) +rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, long long version, + uint8_t table_id, struct flow *flow, + struct flow_wildcards *wc, bool take_ref) { struct classifier *cls = &ofproto->up.tables[table_id].cls; const struct cls_rule *cls_rule; struct rule_dpif *rule; do { - cls_rule = classifier_lookup(cls, CLS_MAX_VERSION, flow, wc); + cls_rule = classifier_lookup(cls, version, flow, wc); rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); @@ -3762,9 +3783,9 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, * 'flow' is non-const to allow for temporary modifications during the lookup. * Any changes are restored before returning. */ struct rule_dpif * -rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, - struct flow_wildcards *wc, bool take_ref, - const struct dpif_flow_stats *stats, +rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, long long version, + struct flow *flow, struct flow_wildcards *wc, + bool take_ref, const struct dpif_flow_stats *stats, uint8_t *table_id, ofp_port_t in_port, bool may_packet_in, bool honor_table_miss) { @@ -3815,7 +3836,8 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, next_id++, next_id += (next_id == TBL_INTERNAL)) { *table_id = next_id; - rule = rule_dpif_lookup_in_table(ofproto, next_id, flow, wc, take_ref); + rule = rule_dpif_lookup_in_table(ofproto, version, next_id, flow, wc, + take_ref); if (stats) { struct oftable *tbl = &ofproto->up.tables[next_id]; unsigned long orig; @@ -5444,7 +5466,9 @@ ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, return error; } - rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow, + rule = rule_dpif_lookup_in_table(ofproto, + ofproto_dpif_get_tables_version(ofproto), + TBL_INTERNAL, &fm.match.flow, &fm.match.wc, false); if (rule) { *rulep = &rule->up; @@ -5499,6 +5523,7 @@ const struct ofproto_class ofproto_dpif_class = { type_get_memory_usage, flush, query_tables, + set_tables_version, port_alloc, port_construct, port_destruct, diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 9d625a162..aaaf67191 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. +/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,7 +102,10 @@ size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *); bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *); bool ofproto_dpif_get_enable_ufid(struct dpif_backer *backer); +long long ofproto_dpif_get_tables_version(struct ofproto_dpif *); + struct rule_dpif *rule_dpif_lookup_from_table(struct ofproto_dpif *, + long long version, struct flow *, struct flow_wildcards *, bool take_ref, diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 2e4d33380..46ffe7f4d 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -93,6 +93,8 @@ struct ofproto { long long int eviction_group_timer; /* For rate limited reheapification. */ struct oftable *tables; int n_tables; + long long tables_version; /* Controls which rules are visible to + * table lookups. */ /* Rules indexed on their cookie values, in all flow tables. */ struct hindex cookies OVS_GUARDED_BY(ofproto_mutex); @@ -834,6 +836,10 @@ struct ofproto_class { struct ofputil_table_features *features, struct ofputil_table_stats *stats); + /* Sets the current tables version the provider should use for classifier + * lookups. */ + void (*set_tables_version)(struct ofproto *ofproto, + long long version); /* ## ---------------- ## */ /* ## ofport Functions ## */ /* ## ---------------- ## */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index b5424b9f4..716fbfa30 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -527,6 +527,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ofproto->eviction_group_timer = LLONG_MIN; ofproto->tables = NULL; ofproto->n_tables = 0; + ofproto->tables_version = CLS_MIN_VERSION; hindex_init(&ofproto->cookies); hmap_init(&ofproto->learned_cookies); list_init(&ofproto->expirable); @@ -577,6 +578,10 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ofproto->meters = xzalloc((ofproto->meter_features.max_meters + 1) * sizeof(struct meter *)); + /* Set the initial tables version. */ + ofproto->ofproto_class->set_tables_version(ofproto, + ofproto->tables_version); + *ofprotop = ofproto; return 0; } From 39c9459355b6f010aa73ca80ad8d0e6893ef0a88 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 15:53:43 -0700 Subject: [PATCH 141/146] Use classifier versioning. Each rule is now added or deleted in a specific tables version. Flow tables are versioned with a monotonically increasing 64-bit integer, where positive values are valid version numbers. Rule modifications are implemented as an insertion of a new rule and a deletion of the old rule, both taking place in the same tables version. Since concurrent lookups may use different versions, both the old and new rule must be available for lookups at the same time. The ofproto provider interface is changed to accomodate the above. As rule's actions need not be modified any more, we no longer need 'rule_premodify_actions', nor 'rule_modify_actions'. 'rule_insert' now takes a pointer to the old rule and adds a flag that tells whether the old stats should be forwarded to the new rule or not (this replaces the 'reset_counters' flag of the now removed 'rule_modify_actions'). Versioning all flow table changes has the side effect of making learned flows visible for future lookups only. I.e., the upcall that executes the learn action, will not see the newly learned action in it's classifier lookups. Only upcalls that start executing after the new flow was added will match on it. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- NEWS | 21 +- lib/classifier.c | 14 +- lib/classifier.h | 2 + lib/ofp-actions.c | 19 +- ofproto/bundles.h | 5 +- ofproto/ofproto-dpif.c | 91 ++-- ofproto/ofproto-provider.h | 59 +-- ofproto/ofproto.c | 967 ++++++++++++++++++++----------------- tests/ofproto.at | 42 +- tests/ovs-ofctl.at | 40 +- utilities/ovs-ofctl.8.in | 19 +- utilities/ovs-ofctl.c | 4 +- 12 files changed, 701 insertions(+), 582 deletions(-) diff --git a/NEWS b/NEWS index 5bea23798..a3eeed52b 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ Post-v2.3.0 --------------------- + - Flow table modifications are now atomic, meaning that each packet + now sees a coherent version of the OpenFlow pipeline. For + example, if a controller removes all flows with a single OpenFlow + "flow_mod", no packet sees an intermediate version of the OpenFlow + pipeline where only some of the flows have been deleted. - Added support for SFQ, FQ_CoDel and CoDel qdiscs. - Add bash command-line completion support for ovs-vsctl Please check utilities/ovs-command-compgen.INSTALL.md for how to use. @@ -28,10 +33,10 @@ Post-v2.3.0 release. See ovs-vswitchd(8) for details. - OpenFlow: * OpenFlow 1.4 bundles are now supported, but for flow mod - messages only. 'atomic' bundles are not yet supported, and - 'ordered' bundles are trivially supported, as all bundled - messages are executed in the order they were added to the - bundle regardless of the presence of the 'ordered' flag. + messages only. Both 'atomic' and 'ordered' bundle flags are + trivially supported, as all bundled messages are executed in + the order they were added and all flow table modifications are + now atomic to the datapath. * IPv6 flow label and neighbor discovery fields are now modifiable. * OpenFlow 1.5 extended registers are now supported. * The OpenFlow 1.5 actset_output field is now supported. @@ -49,15 +54,15 @@ Post-v2.3.0 - ovs-ofctl has a new '--bundle' option that makes the flow mod commands ('add-flow', 'add-flows', 'mod-flows', 'del-flows', and 'replace-flows') use an OpenFlow 1.4 bundle to operate the modifications as a single - transaction. If any of the flow mods in a transaction fail, none of - them are executed. + atomic transaction. If any of the flow mods in a transaction fail, none + of them are executed. All flow mods in a bundle appear to datapath + lookups simultaneously. - ovs-ofctl 'add-flow' and 'add-flows' commands now accept arbitrary flow mods as an input by allowing the flow specification to start with an explicit 'add', 'modify', 'modify_strict', 'delete', or 'delete_strict' keyword. A missing keyword is treated as 'add', so this is fully backwards compatible. With the new '--bundle' option all the flow mods - are executed as a single transaction using the new OpenFlow 1.4 bundles - support. + are executed as a single atomic transaction using an OpenFlow 1.4 bundle. - ovs-pki: Changed message digest algorithm from MD5 to SHA-1 because MD5 is no longer secure and some operating systems have started to disable it in OpenSSL. diff --git a/lib/classifier.c b/lib/classifier.c index 50bbbbf3f..5f92f0514 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -202,14 +202,24 @@ cls_rule_init_from_minimatch(struct cls_rule *rule, minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match); } +/* Initializes 'dst' as a copy of 'src', but with 'version'. + * + * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ +void +cls_rule_clone_in_version(struct cls_rule *dst, const struct cls_rule *src, + long long version) +{ + cls_rule_init__(dst, src->priority, version); + minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match); +} + /* Initializes 'dst' as a copy of 'src'. * * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ void cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) { - cls_rule_init__(dst, src->priority, src->version); - minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match); + cls_rule_clone_in_version(dst, src, src->version); } /* Initializes 'dst' with the data in 'src', destroying 'src'. diff --git a/lib/classifier.h b/lib/classifier.h index cb0030abd..ef5744631 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -367,6 +367,8 @@ void cls_rule_init(struct cls_rule *, const struct match *, int priority, void cls_rule_init_from_minimatch(struct cls_rule *, const struct minimatch *, int priority, long long version); void cls_rule_clone(struct cls_rule *, const struct cls_rule *); +void cls_rule_clone_in_version(struct cls_rule *, const struct cls_rule *, + long long version); void cls_rule_move(struct cls_rule *dst, struct cls_rule *src); void cls_rule_destroy(struct cls_rule *); diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index e18229da6..10e2a92c0 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -3660,7 +3660,24 @@ format_RESUBMIT(const struct ofpact_resubmit *a, struct ds *s) * address. This is not usually the intent in MAC learning; instead, we want * the MAC learn entry to expire when no traffic has been sent *from* the * learned address. Use a hard timeout for that. - */ + * + * + * Visibility of Changes + * --------------------- + * + * Prior to Open vSwitch 2.4, any changes made by a "learn" action in a given + * flow translation are visible to flow table lookups made later in the flow + * translation. This means that, in the example above, a MAC learned by the + * learn action in table 0 would be found in table 1 (if the packet being + * processed had the same source and destination MAC address). + * + * In Open vSwitch 2.4 and later, changes to a flow table (whether to add or + * modify a flow) by a "learn" action are visible only for later flow + * translations, not for later lookups within the same flow translation. In + * the MAC learning example, a MAC learned by the learn action in table 0 would + * not be found in table 1 if the flow translation would resubmit to table 1 + * after the processing of the learn action, meaning that if this MAC had not + * been learned before then the packet would be flooded. */ struct nx_action_learn { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* At least 24. */ diff --git a/ofproto/bundles.h b/ofproto/bundles.h index 778cba25a..0c7daf288 100644 --- a/ofproto/bundles.h +++ b/ofproto/bundles.h @@ -40,9 +40,8 @@ struct ofp_bundle_entry { }; /* Used during commit. */ - struct rule_collection rules; /* Affected rules. */ - struct rule *rule; - bool modify; + struct rule_collection old_rules; /* Affected rules. */ + struct rule_collection new_rules; /* Affected rules. */ /* OpenFlow header and some of the message contents for error reporting. */ struct ofp_header ofp_msg[DIV_ROUND_UP(64, sizeof(struct ofp_header))]; diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index cc5d9d4d7..55fea0f05 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -89,6 +89,15 @@ struct rule_dpif { struct ovs_mutex stats_mutex; struct dpif_flow_stats stats OVS_GUARDED; + /* In non-NULL, will point to a new rule (for which a reference is held) to + * which all the stats updates should be forwarded. This exists only + * transitionally when flows are replaced. + * + * Protected by stats_mutex. If both 'rule->stats_mutex' and + * 'rule->new_rule->stats_mutex' must be held together, acquire them in that + * order, */ + struct rule_dpif *new_rule OVS_GUARDED; + /* If non-zero then the recirculation id that has * been allocated for use with this rule. * The recirculation id and associated internal flow should @@ -3668,9 +3677,13 @@ rule_dpif_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats) { ovs_mutex_lock(&rule->stats_mutex); - rule->stats.n_packets += stats->n_packets; - rule->stats.n_bytes += stats->n_bytes; - rule->stats.used = MAX(rule->stats.used, stats->used); + if (OVS_UNLIKELY(rule->new_rule)) { + rule_dpif_credit_stats(rule->new_rule, stats); + } else { + rule->stats.n_packets += stats->n_packets; + rule->stats.n_bytes += stats->n_bytes; + rule->stats.used = MAX(rule->stats.used, stats->used); + } ovs_mutex_unlock(&rule->stats_mutex); } @@ -3722,11 +3735,12 @@ rule_set_recirc_id(struct rule *rule_, uint32_t id) } long long -ofproto_dpif_get_tables_version(struct ofproto_dpif *ofproto) +ofproto_dpif_get_tables_version(struct ofproto_dpif *ofproto OVS_UNUSED) { long long version; atomic_read_relaxed(&ofproto->tables_version, &version); + return version; } @@ -3756,12 +3770,12 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, long long version, return rule; } -/* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'. - * Returns the rule that was found, which may be one of the special rules - * according to packet miss hadling. If 'may_packet_in' is false, returning of - * the miss_rule (which issues packet ins for the controller) is avoided. - * Updates 'wc', if nonnull, to reflect the fields that were used during the - * lookup. +/* Look up 'flow' in 'ofproto''s classifier version 'version', starting from + * table '*table_id'. Returns the rule that was found, which may be one of the + * special rules according to packet miss hadling. If 'may_packet_in' is + * false, returning of the miss_rule (which issues packet ins for the + * controller) is avoided. Updates 'wc', if nonnull, to reflect the fields + * that were used during the lookup. * * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but * if none is found then the table miss configuration for that table is @@ -3927,17 +3941,35 @@ rule_construct(struct rule *rule_) rule->stats.n_bytes = 0; rule->stats.used = rule->up.modified; rule->recirc_id = 0; + rule->new_rule = NULL; return 0; } -static enum ofperr -rule_insert(struct rule *rule_) +static void +rule_insert(struct rule *rule_, struct rule *old_rule_, bool forward_stats) OVS_REQUIRES(ofproto_mutex) { struct rule_dpif *rule = rule_dpif_cast(rule_); + + if (old_rule_ && forward_stats) { + struct rule_dpif *old_rule = rule_dpif_cast(old_rule_); + + ovs_assert(!old_rule->new_rule); + + /* Take a reference to the new rule, and refer all stats updates from + * the old rule to the new rule. */ + rule_dpif_ref(rule); + + ovs_mutex_lock(&old_rule->stats_mutex); + ovs_mutex_lock(&rule->stats_mutex); + old_rule->new_rule = rule; /* Forward future stats. */ + rule->stats = old_rule->stats; /* Transfer stats to the new rule. */ + ovs_mutex_unlock(&rule->stats_mutex); + ovs_mutex_unlock(&old_rule->stats_mutex); + } + complete_operation(rule); - return 0; } static void @@ -3950,10 +3982,15 @@ rule_delete(struct rule *rule_) static void rule_destruct(struct rule *rule_) + OVS_NO_THREAD_SAFETY_ANALYSIS { struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_destroy(&rule->stats_mutex); + /* Release reference to the new rule, if any. */ + if (rule->new_rule) { + rule_dpif_unref(rule->new_rule); + } if (rule->recirc_id) { recirc_free_id(rule->recirc_id); } @@ -3966,9 +4003,13 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes, struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_lock(&rule->stats_mutex); - *packets = rule->stats.n_packets; - *bytes = rule->stats.n_bytes; - *used = rule->stats.used; + if (OVS_UNLIKELY(rule->new_rule)) { + rule_get_stats(&rule->new_rule->up, packets, bytes, used); + } else { + *packets = rule->stats.n_packets; + *bytes = rule->stats.n_bytes; + *used = rule->stats.used; + } ovs_mutex_unlock(&rule->stats_mutex); } @@ -3990,22 +4031,6 @@ rule_execute(struct rule *rule, const struct flow *flow, return 0; } -static void -rule_modify_actions(struct rule *rule_, bool reset_counters) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule_dpif *rule = rule_dpif_cast(rule_); - - if (reset_counters) { - ovs_mutex_lock(&rule->stats_mutex); - rule->stats.n_packets = 0; - rule->stats.n_bytes = 0; - ovs_mutex_unlock(&rule->stats_mutex); - } - - complete_operation(rule); -} - static struct group_dpif *group_dpif_cast(const struct ofgroup *group) { return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL; @@ -5550,8 +5575,6 @@ const struct ofproto_class ofproto_dpif_class = { rule_dealloc, rule_get_stats, rule_execute, - NULL, /* rule_premodify_actions */ - rule_modify_actions, set_frag_handling, packet_out, set_netflow, diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 46ffe7f4d..07229c598 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -323,6 +323,8 @@ struct rule { struct ofproto *const ofproto; /* The ofproto that contains this rule. */ const struct cls_rule cr; /* In owning ofproto's classifier. */ const uint8_t table_id; /* Index in ofproto's 'tables' array. */ + bool removed; /* Rule has been removed from the ofproto + * data structures. */ /* Protects members marked OVS_GUARDED. * Readers only need to hold this mutex. @@ -361,7 +363,7 @@ struct rule { /* OpenFlow actions. See struct rule_actions for more thread-safety * notes. */ - OVSRCU_TYPE(const struct rule_actions *) actions; + const struct rule_actions * const actions; /* In owning meter's 'rules' list. An empty list if there is no meter. */ struct ovs_list meter_list_node OVS_GUARDED_BY(ofproto_mutex); @@ -405,7 +407,7 @@ static inline bool rule_is_hidden(const struct rule *); * code that holds 'rule->mutex' (where 'rule' is the rule for which * 'rule->actions == actions') or during the RCU active period. * - * All members are immutable: they do not change during the struct's + * All members are immutable: they do not change during the rule's * lifetime. */ struct rule_actions { /* Flags. @@ -1124,7 +1126,7 @@ struct ofproto_class { * OpenFlow error code), the ofproto base code will uninitialize and * deallocate 'rule'. See "Rule Life Cycle" above for more details. * - * ->rule_construct() may also: + * ->rule_construct() must also: * * - Validate that the datapath supports the matching rule in 'rule->cr' * datapath. For example, if the rule's table does not support @@ -1133,8 +1135,9 @@ struct ofproto_class { * * - Validate that the datapath can correctly implement 'rule->ofpacts'. * - * Some implementations might need to defer these tasks to ->rule_insert(), - * which is also acceptable. + * After a successful construction the rest of the rule life cycle calls + * may not fail, so ->rule_construct() must also make sure that the rule + * can be inserted in to the datapath. * * * Insertion @@ -1143,11 +1146,10 @@ struct ofproto_class { * Following successful construction, the ofproto base case inserts 'rule' * into its flow table, then it calls ->rule_insert(). ->rule_insert() * must add the new rule to the datapath flow table and return only after - * this is complete (whether it succeeds or fails). - * - * If ->rule_insert() fails, the ofproto base code will remove 'rule' from - * the flow table, destruct, uninitialize, and deallocate 'rule'. See - * "Rule Life Cycle" above for more details. + * this is complete. The 'new_rule' may be a duplicate of an 'old_rule'. + * In this case the 'old_rule' is non-null, and the implementation should + * forward rule statistics from the 'old_rule' to the 'new_rule' if + * 'forward_stats' is 'true'. This may not fail. * * * Deletion @@ -1169,7 +1171,8 @@ struct ofproto_class { struct rule *(*rule_alloc)(void); enum ofperr (*rule_construct)(struct rule *rule) /* OVS_REQUIRES(ofproto_mutex) */; - enum ofperr (*rule_insert)(struct rule *rule) + void (*rule_insert)(struct rule *rule, struct rule *old_rule, + bool forward_stats) /* OVS_REQUIRES(ofproto_mutex) */; void (*rule_delete)(struct rule *rule) /* OVS_REQUIRES(ofproto_mutex) */; void (*rule_destruct)(struct rule *rule); @@ -1202,36 +1205,6 @@ struct ofproto_class { enum ofperr (*rule_execute)(struct rule *rule, const struct flow *flow, struct dp_packet *packet); - /* If the datapath can properly implement changing 'rule''s actions to the - * 'ofpacts_len' bytes in 'ofpacts', returns 0. Otherwise, returns an enum - * ofperr indicating why the new actions wouldn't work. - * - * May be a null pointer if any set of actions is acceptable. */ - enum ofperr (*rule_premodify_actions)(const struct rule *rule, - const struct ofpact *ofpacts, - size_t ofpacts_len) - /* OVS_REQUIRES(ofproto_mutex) */; - - /* When ->rule_modify_actions() is called, the caller has already replaced - * the OpenFlow actions in 'rule' by a new set. (If - * ->rule_premodify_actions is nonnull, then it was previously called to - * verify that the new set of actions is acceptable.) - * - * ->rule_modify_actions() must: - * - * - Update the datapath flow table with the new actions. - * - * - Only if 'reset_counters' is true, reset any packet or byte counters - * associated with the rule to zero, so that rule_get_stats() will not - * longer count those packets or bytes. - * - * Rule modification must not fail. - * - * ->rule_modify_actions() should not modify any base members of struct - * rule. */ - void (*rule_modify_actions)(struct rule *rule, bool reset_counters) - /* OVS_REQUIRES(ofproto_mutex) */; - /* Changes the OpenFlow IP fragment handling policy to 'frag_handling', * which takes one of the following values, with the corresponding * meanings: @@ -1785,7 +1758,7 @@ void ofproto_flush_flows(struct ofproto *); static inline const struct rule_actions * rule_get_actions(const struct rule *rule) { - return ovsrcu_get(const struct rule_actions *, &rule->actions); + return rule->actions; } /* Returns true if 'rule' is an OpenFlow 1.3 "table-miss" rule, false diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 716fbfa30..5242cf0e6 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -89,8 +89,6 @@ static void oftable_enable_eviction(struct oftable *, const struct mf_subfield *fields, size_t n_fields); -static void oftable_remove_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex); - /* A set of rules within a single OpenFlow table (oftable) that have the same * values for the oftable's eviction_fields. A rule to be evicted, when one is * needed, is taken from the eviction group that contains the greatest number @@ -254,15 +252,29 @@ struct flow_mod_requester { }; /* OpenFlow. */ -static enum ofperr modify_flow_check__(struct ofproto *, +static enum ofperr replace_rule_create(struct ofproto *, struct ofputil_flow_mod *, - const struct rule *) + struct cls_rule *cr, uint8_t table_id, + struct rule *old_rule, + struct rule **new_rule) OVS_REQUIRES(ofproto_mutex); -static void modify_flow__(struct ofproto *, struct ofputil_flow_mod *, - const struct flow_mod_requester *, struct rule *, - struct ovs_list *dead_cookies) + +static void replace_rule_start(struct ofproto *, + struct rule *old_rule, + struct rule *new_rule, + struct cls_conjunction *, size_t n_conjs) OVS_REQUIRES(ofproto_mutex); -static void delete_flows__(const struct rule_collection *, + +static void replace_rule_revert(struct ofproto *, + struct rule *old_rule, struct rule *new_rule) + OVS_REQUIRES(ofproto_mutex); + +static void replace_rule_finish(struct ofproto *, struct ofputil_flow_mod *, + const struct flow_mod_requester *, + struct rule *old_rule, struct rule *new_rule, + struct ovs_list *dead_cookies) + OVS_REQUIRES(ofproto_mutex); +static void delete_flows__(struct rule_collection *, enum ofp_flow_removed_reason, const struct flow_mod_requester *) OVS_REQUIRES(ofproto_mutex); @@ -474,6 +486,14 @@ ofproto_enumerate_names(const char *type, struct sset *names) return class ? class->enumerate_names(type, names) : EAFNOSUPPORT; } +static void +ofproto_bump_tables_version(struct ofproto *ofproto) +{ + ++ofproto->tables_version; + ofproto->ofproto_class->set_tables_version(ofproto, + ofproto->tables_version); +} + int ofproto_create(const char *datapath_name, const char *datapath_type, struct ofproto **ofprotop) @@ -579,8 +599,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, * sizeof(struct meter *)); /* Set the initial tables version. */ - ofproto->ofproto_class->set_tables_version(ofproto, - ofproto->tables_version); + ofproto_bump_tables_version(ofproto); *ofprotop = ofproto; return 0; @@ -1447,9 +1466,19 @@ ofproto_rule_delete(struct ofproto *ofproto, struct rule *rule) * switch is being deleted and any OpenFlow channels have been or soon will * be killed. */ ovs_mutex_lock(&ofproto_mutex); - oftable_remove_rule(rule); - ofproto->ofproto_class->rule_delete(rule); - ofproto_rule_unref(rule); + + if (!rule->removed) { + /* Make sure there is no postponed removal of the rule. */ + ovs_assert(cls_rule_visible_in_version(&rule->cr, CLS_MAX_VERSION)); + + if (!classifier_remove(&rule->ofproto->tables[rule->table_id].cls, + &rule->cr)) { + OVS_NOT_REACHED(); + } + ofproto_rule_remove__(rule->ofproto, rule); + ofproto->ofproto_class->rule_delete(rule); + ofproto_rule_unref(rule); + } ovs_mutex_unlock(&ofproto_mutex); } @@ -1485,7 +1514,6 @@ ofproto_flush__(struct ofproto *ofproto) rule_collection_add(&rules, rule); } delete_flows__(&rules, OFPRR_DELETE, NULL); - rule_collection_destroy(&rules); } /* XXX: Concurrent handler threads may insert new learned flows based on * learn actions of the now deleted flows right after we release @@ -1537,6 +1565,16 @@ ofproto_destroy__(struct ofproto *ofproto) ofproto->ofproto_class->dealloc(ofproto); } +/* Destroying rules is doubly deferred, must have 'ofproto' around for them. + * - 1st we defer the removal of the rules from the classifier + * - 2nd we defer the actual destruction of the rules. */ +static void +ofproto_destroy_defer__(struct ofproto *ofproto) + OVS_EXCLUDED(ofproto_mutex) +{ + ovsrcu_postpone(ofproto_destroy__, ofproto); +} + void ofproto_destroy(struct ofproto *p) OVS_EXCLUDED(ofproto_mutex) @@ -1573,7 +1611,7 @@ ofproto_destroy(struct ofproto *p) connmgr_destroy(p->connmgr); /* Destroying rules is deferred, must have 'ofproto' around for them. */ - ovsrcu_postpone(ofproto_destroy__, p); + ovsrcu_postpone(ofproto_destroy_defer__, p); } /* Destroys the datapath with the respective 'name' and 'type'. With the Linux @@ -2088,8 +2126,8 @@ ofproto_flow_mod(struct ofproto *ofproto, struct ofputil_flow_mod *fm) bool done = false; rule = rule_from_cls_rule(classifier_find_match_exactly( - &table->cls, &fm->match, - fm->priority, CLS_MAX_VERSION)); + &table->cls, &fm->match, fm->priority, + CLS_MAX_VERSION)); if (rule) { /* Reading many of the rule fields and writing on 'modified' * requires the rule->mutex. Also, rule->actions may change @@ -2135,9 +2173,8 @@ ofproto_delete_flow(struct ofproto *ofproto, /* First do a cheap check whether the rule we're looking for has already * been deleted. If so, then we're done. */ - rule = rule_from_cls_rule(classifier_find_match_exactly(cls, target, - priority, - CLS_MAX_VERSION)); + rule = rule_from_cls_rule(classifier_find_match_exactly( + cls, target, priority, CLS_MAX_VERSION)); if (!rule) { return; } @@ -2721,62 +2758,6 @@ ofproto_rule_destroy__(struct rule *rule) rule->ofproto->ofproto_class->rule_dealloc(rule); } -/* Create a new rule based on attributes in 'fm', match in 'cr', and - * 'table_id'. Note that the rule is NOT inserted into a any data structures - * yet. Takes ownership of 'cr'. */ -static enum ofperr -ofproto_rule_create(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - struct cls_rule *cr, uint8_t table_id, - struct rule **rulep) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule *rule; - enum ofperr error; - - /* Allocate new rule. */ - rule = ofproto->ofproto_class->rule_alloc(); - if (!rule) { - cls_rule_destroy(cr); - VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); - return OFPERR_OFPFMFC_UNKNOWN; - } - - /* Initialize base state. */ - *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto; - cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr); - ovs_refcount_init(&rule->ref_count); - rule->flow_cookie = fm->new_cookie; - rule->created = rule->modified = time_msec(); - - ovs_mutex_init(&rule->mutex); - ovs_mutex_lock(&rule->mutex); - rule->idle_timeout = fm->idle_timeout; - rule->hard_timeout = fm->hard_timeout; - rule->importance = fm->importance; - ovs_mutex_unlock(&rule->mutex); - - *CONST_CAST(uint8_t *, &rule->table_id) = table_id; - rule->flags = fm->flags & OFPUTIL_FF_STATE; - ovsrcu_set_hidden(&rule->actions, - rule_actions_create(fm->ofpacts, fm->ofpacts_len)); - list_init(&rule->meter_list_node); - rule->eviction_group = NULL; - list_init(&rule->expirable); - rule->monitor_flags = 0; - rule->add_seqno = 0; - rule->modify_seqno = 0; - - /* Construct rule, initializing derived state. */ - error = ofproto->ofproto_class->rule_construct(rule); - if (error) { - ofproto_rule_destroy__(rule); - return error; - } - - *rulep = rule; - return 0; -} - static void rule_destroy_cb(struct rule *rule) { @@ -2815,6 +2796,70 @@ ofproto_rule_unref(struct rule *rule) } } +static void +remove_rule_rcu__(struct rule *rule) + OVS_REQUIRES(ofproto_mutex) +{ + struct ofproto *ofproto = rule->ofproto; + struct oftable *table = &ofproto->tables[rule->table_id]; + + ovs_assert(!cls_rule_visible_in_version(&rule->cr, CLS_MAX_VERSION)); + if (!classifier_remove(&table->cls, &rule->cr)) { + OVS_NOT_REACHED(); + } + ofproto->ofproto_class->rule_delete(rule); + ofproto_rule_unref(rule); +} + +static void +remove_rule_rcu(struct rule *rule) + OVS_EXCLUDED(ofproto_mutex) +{ + ovs_mutex_lock(&ofproto_mutex); + remove_rule_rcu__(rule); + ovs_mutex_unlock(&ofproto_mutex); +} + +/* Removes and deletes rules from a NULL-terminated array of rule pointers. */ +static void +remove_rules_rcu(struct rule **rules) + OVS_EXCLUDED(ofproto_mutex) +{ + struct rule **orig_rules = rules; + + if (*rules) { + struct ofproto *ofproto = rules[0]->ofproto; + unsigned long tables[BITMAP_N_LONGS(256)]; + struct rule *rule; + size_t table_id; + + memset(tables, 0, sizeof tables); + + ovs_mutex_lock(&ofproto_mutex); + while ((rule = *rules++)) { + /* Defer once for each new table. This defers the subtable cleanup + * until later, so that when removing large number of flows the + * operation is faster. */ + if (!bitmap_is_set(tables, rule->table_id)) { + struct classifier *cls = &ofproto->tables[rule->table_id].cls; + + bitmap_set1(tables, rule->table_id); + classifier_defer(cls); + } + remove_rule_rcu__(rule); + } + + BITMAP_FOR_EACH_1(table_id, 256, tables) { + struct classifier *cls = &ofproto->tables[table_id].cls; + + classifier_publish(cls); + } + ovs_mutex_unlock(&ofproto_mutex); + } + + free(orig_rules); +} + void ofproto_group_ref(struct ofgroup *group) { @@ -3050,9 +3095,8 @@ learned_cookies_flush(struct ofproto *ofproto, struct ovs_list *dead_cookies) c->cookie, OVS_BE64_MAX, OFPP_ANY, OFPG_ANY); rule_criteria_require_rw(&criteria, false); collect_rules_loose(ofproto, &criteria, &rules); - delete_flows__(&rules, OFPRR_DELETE, NULL); rule_criteria_destroy(&criteria); - rule_collection_destroy(&rules); + delete_flows__(&rules, OFPRR_DELETE, NULL); free(c); } @@ -3778,6 +3822,25 @@ rule_collection_unref(struct rule_collection *rules) } } +/* Returns a NULL-terminated array of rule pointers, + * destroys 'rules'. */ +static struct rule ** +rule_collection_detach(struct rule_collection *rules) +{ + struct rule **rule_array; + + rule_collection_add(rules, NULL); + + if (rules->rules == rules->stub) { + rules->rules = xmemdup(rules->rules, rules->n * sizeof *rules->rules); + } + + rule_array = rules->rules; + rule_collection_init(rules); + + return rule_array; +} + void rule_collection_destroy(struct rule_collection *rules) { @@ -3789,6 +3852,20 @@ rule_collection_destroy(struct rule_collection *rules) rule_collection_init(rules); } +/* Schedules postponed removal of rules, destroys 'rules'. */ +static void +rule_collection_remove_postponed(struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + if (rules->n > 0) { + if (rules->n == 1) { + ovsrcu_postpone(remove_rule_rcu, rules->rules[0]); + } else { + ovsrcu_postpone(remove_rules_rcu, rule_collection_detach(rules)); + } + } +} + /* Checks whether 'rule' matches 'c' and, if so, adds it to 'rules'. This * function verifies most of the criteria in 'c' itself, but the caller must * check 'c->cr' itself. @@ -4301,7 +4378,6 @@ evict_rules_from_table(struct oftable *table, unsigned int extra_space) } } delete_flows__(&rules, OFPRR_EVICTION, NULL); - rule_collection_destroy(&rules); return error; } @@ -4344,16 +4420,6 @@ get_conjunctions(const struct ofputil_flow_mod *fm, *n_conjsp = n_conjs; } -static void -set_conjunctions(struct rule *rule, const struct cls_conjunction *conjs, - size_t n_conjs) - OVS_REQUIRES(ofproto_mutex) -{ - struct cls_rule *cr = CONST_CAST(struct cls_rule *, &rule->cr); - - cls_rule_set_conjunctions(cr, conjs, n_conjs); -} - /* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT * in which no matching flow already exists in the flow table. * @@ -4367,14 +4433,16 @@ set_conjunctions(struct rule *rule, const struct cls_conjunction *conjs, * The caller retains ownership of 'fm->ofpacts'. */ static enum ofperr add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - struct rule **rulep, bool *modify) + struct rule **old_rule, struct rule **new_rule) OVS_REQUIRES(ofproto_mutex) { struct oftable *table; struct cls_rule cr; struct rule *rule; uint8_t table_id; - enum ofperr error = 0; + struct cls_conjunction *conjs; + size_t n_conjs; + enum ofperr error; if (!check_table_id(ofproto, fm->table_id)) { error = OFPERR_OFPBRC_BAD_TABLE_ID; @@ -4413,26 +4481,13 @@ add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return OFPERR_OFPBRC_EPERM; } - cls_rule_init(&cr, &fm->match, fm->priority, CLS_MIN_VERSION); + cls_rule_init(&cr, &fm->match, fm->priority, ofproto->tables_version + 1); /* Check for the existence of an identical rule. * This will not return rules earlier marked for removal. */ rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls, &cr)); - if (rule) { - /* Transform "add" into "modify" of an existing identical flow. */ - cls_rule_destroy(&cr); - - fm->modify_cookie = true; - error = modify_flow_check__(ofproto, fm, rule); - if (error) { - return error; - } - - *modify = true; - } else { /* New rule. */ - struct cls_conjunction *conjs; - size_t n_conjs; - + *old_rule = rule; + if (!rule) { /* Check for overlap, if requested. */ if (fm->flags & OFPUTIL_FF_CHECK_OVERLAP && classifier_rule_overlaps(&table->cls, &cr)) { @@ -4446,82 +4501,52 @@ add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, cls_rule_destroy(&cr); return error; } - - /* Allocate new rule. */ - error = ofproto_rule_create(ofproto, fm, &cr, table - ofproto->tables, - &rule); - if (error) { - return error; - } - - /* Insert flow to the classifier, so that later flow_mods may relate - * to it. This is reversible, in case later errors require this to - * be reverted. */ - ofproto_rule_insert__(ofproto, rule); - /* Make the new rule invisible for classifier lookups. */ - classifier_defer(&table->cls); - get_conjunctions(fm, &conjs, &n_conjs); - classifier_insert(&table->cls, &rule->cr, conjs, n_conjs); - free(conjs); - - error = ofproto->ofproto_class->rule_insert(rule); - if (error) { - oftable_remove_rule(rule); - ofproto_rule_unref(rule); - return error; - } - - *modify = false; + } else { + fm->modify_cookie = true; } - *rulep = rule; + /* Allocate new rule. */ + error = replace_rule_create(ofproto, fm, &cr, table - ofproto->tables, + rule, new_rule); + if (error) { + return error; + } + + get_conjunctions(fm, &conjs, &n_conjs); + replace_rule_start(ofproto, rule, *new_rule, conjs, n_conjs); + free(conjs); + return 0; } /* Revert the effects of add_flow_start(). - * 'new_rule' must be passed in as NULL, if no new rule was allocated and - * inserted to the classifier. - * Note: evictions cannot be reverted. */ + * XXX: evictions cannot be reverted. */ static void -add_flow_revert(struct ofproto *ofproto, struct rule *new_rule) +add_flow_revert(struct ofproto *ofproto, struct rule *old_rule, + struct rule *new_rule) OVS_REQUIRES(ofproto_mutex) { - /* Old rule was not changed yet, only need to revert a new rule. */ - if (new_rule) { - struct oftable *table = &ofproto->tables[new_rule->table_id]; - - if (!classifier_remove(&table->cls, &new_rule->cr)) { - OVS_NOT_REACHED(); - } - classifier_publish(&table->cls); - - ofproto_rule_remove__(ofproto, new_rule); - ofproto->ofproto_class->rule_delete(new_rule); - ofproto_rule_unref(new_rule); - } + replace_rule_revert(ofproto, old_rule, new_rule); } +/* To be called after version bump. */ static void add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, - struct rule *rule, bool modify) + struct rule *old_rule, struct rule *new_rule) OVS_REQUIRES(ofproto_mutex) { - if (modify) { - struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); + struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); - modify_flow__(ofproto, fm, req, rule, &dead_cookies); - learned_cookies_flush(ofproto, &dead_cookies); + replace_rule_finish(ofproto, fm, req, old_rule, new_rule, &dead_cookies); + learned_cookies_flush(ofproto, &dead_cookies); + + if (old_rule) { + ovsrcu_postpone(remove_rule_rcu, old_rule); } else { - struct oftable *table = &ofproto->tables[rule->table_id]; - - classifier_publish(&table->cls); - - learned_cookies_inc(ofproto, rule_get_actions(rule)); - - if (minimask_get_vid_mask(&rule->cr.match.mask) == VLAN_VID_MASK) { + if (minimask_get_vid_mask(&new_rule->cr.match.mask) == VLAN_VID_MASK) { if (ofproto->vlan_bitmap) { - uint16_t vid = miniflow_get_vid(&rule->cr.match.flow); + uint16_t vid = miniflow_get_vid(&new_rule->cr.match.flow); if (!bitmap_is_set(ofproto->vlan_bitmap, vid)) { bitmap_set1(ofproto->vlan_bitmap, vid); @@ -4532,208 +4557,237 @@ add_flow_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } } - ofmonitor_report(ofproto->connmgr, rule, NXFME_ADDED, 0, + ofmonitor_report(ofproto->connmgr, new_rule, NXFME_ADDED, 0, req ? req->ofconn : NULL, req ? req->request->xid : 0, NULL); } - send_buffered_packet(req, fm->buffer_id, rule); + send_buffered_packet(req, fm->buffer_id, new_rule); } /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */ -/* Checks if the 'rule' can be modified to match 'fm'. - * - * Returns 0 on success, otherwise an OpenFlow error code. */ +/* Create a new rule based on attributes in 'fm', match in 'cr', 'table_id', + * and 'old_rule'. Note that the rule is NOT inserted into a any data + * structures yet. Takes ownership of 'cr'. */ static enum ofperr -modify_flow_check__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct rule *rule) - OVS_REQUIRES(ofproto_mutex) -{ - if (ofproto->ofproto_class->rule_premodify_actions) { - return ofproto->ofproto_class->rule_premodify_actions( - rule, fm->ofpacts, fm->ofpacts_len); - } - return 0; -} - -/* Checks if the rules listed in 'rules' can have their actions changed to - * match those in 'fm'. - * - * Returns 0 on success, otherwise an OpenFlow error code. */ -static enum ofperr -modify_flows_check__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct rule_collection *rules) - OVS_REQUIRES(ofproto_mutex) +replace_rule_create(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct cls_rule *cr, uint8_t table_id, + struct rule *old_rule, struct rule **new_rule) { + struct rule *rule; enum ofperr error; - size_t i; - if (ofproto->ofproto_class->rule_premodify_actions) { - for (i = 0; i < rules->n; i++) { - error = modify_flow_check__(ofproto, fm, rules->rules[i]); - if (error) { - return error; - } - } + /* Allocate new rule. */ + rule = ofproto->ofproto_class->rule_alloc(); + if (!rule) { + cls_rule_destroy(cr); + VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); + return OFPERR_OFPFMFC_UNKNOWN; } - return 0; -} - -/* Modifies the 'rule', changing it to match 'fm'. */ -static void -modify_flow__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req, struct rule *rule, - struct ovs_list *dead_cookies) - OVS_REQUIRES(ofproto_mutex) -{ - enum nx_flow_update_event event = fm->command == OFPFC_ADD - ? NXFME_ADDED : NXFME_MODIFIED; - - /* 'fm' says that */ - bool change_cookie = (fm->modify_cookie - && fm->new_cookie != OVS_BE64_MAX - && fm->new_cookie != rule->flow_cookie); - - const struct rule_actions *actions = rule_get_actions(rule); - bool change_actions = !ofpacts_equal(fm->ofpacts, fm->ofpacts_len, - actions->ofpacts, - actions->ofpacts_len); - - bool reset_counters = (fm->flags & OFPUTIL_FF_RESET_COUNTS) != 0; - - long long int now = time_msec(); - - if (change_cookie) { - cookies_remove(ofproto, rule); - } + /* Initialize base state. */ + *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto; + cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr); + ovs_refcount_init(&rule->ref_count); + rule->flow_cookie = fm->new_cookie; + rule->created = rule->modified = time_msec(); + ovs_mutex_init(&rule->mutex); ovs_mutex_lock(&rule->mutex); - if (fm->command == OFPFC_ADD) { - rule->idle_timeout = fm->idle_timeout; - rule->hard_timeout = fm->hard_timeout; - rule->importance = fm->importance; - rule->flags = fm->flags & OFPUTIL_FF_STATE; - rule->created = now; + rule->idle_timeout = fm->idle_timeout; + rule->hard_timeout = fm->hard_timeout; + rule->importance = fm->importance; + + *CONST_CAST(uint8_t *, &rule->table_id) = table_id; + rule->flags = fm->flags & OFPUTIL_FF_STATE; + *CONST_CAST(const struct rule_actions **, &rule->actions) + = rule_actions_create(fm->ofpacts, fm->ofpacts_len); + list_init(&rule->meter_list_node); + rule->eviction_group = NULL; + list_init(&rule->expirable); + rule->monitor_flags = 0; + rule->add_seqno = 0; + rule->modify_seqno = 0; + + /* Copy values from old rule for modify semantics. */ + if (old_rule) { + /* 'fm' says that */ + bool change_cookie = (fm->modify_cookie + && fm->new_cookie != OVS_BE64_MAX + && fm->new_cookie != old_rule->flow_cookie); + + ovs_mutex_lock(&old_rule->mutex); + if (fm->command != OFPFC_ADD) { + rule->idle_timeout = old_rule->idle_timeout; + rule->hard_timeout = old_rule->hard_timeout; + rule->importance = old_rule->importance; + rule->flags = old_rule->flags; + rule->created = old_rule->created; + } + if (!change_cookie) { + rule->flow_cookie = old_rule->flow_cookie; + } + ovs_mutex_unlock(&old_rule->mutex); } - if (change_cookie) { - rule->flow_cookie = fm->new_cookie; - } - rule->modified = now; ovs_mutex_unlock(&rule->mutex); - if (change_cookie) { - cookies_insert(ofproto, rule); - } - if (fm->command == OFPFC_ADD) { - if (fm->idle_timeout || fm->hard_timeout || fm->importance) { - if (!rule->eviction_group) { - eviction_group_add_rule(rule); - } - } else { - eviction_group_remove_rule(rule); - } + /* Construct rule, initializing derived state. */ + error = ofproto->ofproto_class->rule_construct(rule); + if (error) { + ofproto_rule_destroy__(rule); + return error; } - if (change_actions) { - /* We have to change the actions. The rule's conjunctive match set - * is a function of its actions, so we need to update that too. The - * conjunctive match set is used in the lookup process to figure - * which (if any) collection of conjunctive sets the packet matches - * with. However, a rule with conjunction actions is never to be - * returned as a classifier lookup result. To make sure a rule with - * conjunction actions is not returned as a lookup result, we update - * them in a carefully chosen order: - * - * - If we're adding a conjunctive match set where there wasn't one - * before, we have to make the conjunctive match set available to - * lookups before the rule's actions are changed, as otherwise - * rule with a conjunction action could be returned as a lookup - * result. - * - * - To clear some nonempty conjunctive set, we set the rule's - * actions first, so that a lookup can't return a rule with - * conjunction actions. - * - * - Otherwise, order doesn't matter for changing one nonempty - * conjunctive match set to some other nonempty set, since the - * rule's actions are not seen by the classifier, and hence don't - * matter either before or after the change. */ - struct cls_conjunction *conjs; - size_t n_conjs; - get_conjunctions(fm, &conjs, &n_conjs); + rule->removed = true; /* Not yet in ofproto data structures. */ - if (n_conjs) { - set_conjunctions(rule, conjs, n_conjs); - } - ovsrcu_set(&rule->actions, rule_actions_create(fm->ofpacts, - fm->ofpacts_len)); - if (!conjs) { - set_conjunctions(rule, conjs, n_conjs); - } - - free(conjs); - } - - if (change_actions || reset_counters) { - ofproto->ofproto_class->rule_modify_actions(rule, reset_counters); - } - - if (event != NXFME_MODIFIED || change_actions || change_cookie) { - ofmonitor_report(ofproto->connmgr, rule, event, 0, - req ? req->ofconn : NULL, req ? req->request->xid : 0, - change_actions ? actions : NULL); - } - - if (change_actions) { - learned_cookies_inc(ofproto, rule_get_actions(rule)); - learned_cookies_dec(ofproto, actions, dead_cookies); - rule_actions_destroy(actions); - } + *new_rule = rule; + return 0; } -/* Modifies the rules listed in 'rules', changing their actions to match those - * in 'fm'. - * - * 'req' is used to retrieve the packet buffer specified in fm->buffer_id, - * if any. */ static void -modify_flows__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - const struct flow_mod_requester *req, - const struct rule_collection *rules) +replace_rule_start(struct ofproto *ofproto, + struct rule *old_rule, struct rule *new_rule, + struct cls_conjunction *conjs, size_t n_conjs) +{ + struct oftable *table = &ofproto->tables[new_rule->table_id]; + + if (old_rule) { + /* Mark the old rule for removal in the next version. */ + cls_rule_make_invisible_in_version(&old_rule->cr, + ofproto->tables_version + 1, + ofproto->tables_version); + } + /* Insert flow to the classifier, so that later flow_mods may relate + * to it. This is reversible, in case later errors require this to + * be reverted. */ + ofproto_rule_insert__(ofproto, new_rule); + /* Make the new rule visible for classifier lookups only from the next + * version. */ + classifier_insert(&table->cls, &new_rule->cr, conjs, n_conjs); +} + +static void replace_rule_revert(struct ofproto *ofproto, + struct rule *old_rule, struct rule *new_rule) +{ + struct oftable *table = &ofproto->tables[new_rule->table_id]; + + /* Restore the original visibility of the old rule. */ + if (old_rule) { + cls_rule_restore_visibility(&old_rule->cr); + } + + /* Remove the new rule immediately. It was never visible to lookups. */ + if (!classifier_remove(&table->cls, &new_rule->cr)) { + OVS_NOT_REACHED(); + } + ofproto_rule_remove__(ofproto, new_rule); + /* The rule was not inserted to the ofproto provider, so we can + * release it without deleting it from the ofproto provider. */ + ofproto_rule_unref(new_rule); +} + +/* Adds the 'new_rule', replacing the 'old_rule'. */ +static void +replace_rule_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + const struct flow_mod_requester *req, + struct rule *old_rule, struct rule *new_rule, + struct ovs_list *dead_cookies) OVS_REQUIRES(ofproto_mutex) { - struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); - size_t i; + bool forward_stats = !(fm->flags & OFPUTIL_FF_RESET_COUNTS); - for (i = 0; i < rules->n; i++) { - modify_flow__(ofproto, fm, req, rules->rules[i], &dead_cookies); + /* Insert the new flow to the ofproto provider. A non-NULL 'old_rule' is a + * duplicate rule the 'new_rule' is replacing. The provider should link + * the stats from the old rule to the new one if 'forward_stats' is + * 'true'. The 'old_rule' will be deleted right after this call. */ + ofproto->ofproto_class->rule_insert(new_rule, old_rule, forward_stats); + learned_cookies_inc(ofproto, rule_get_actions(new_rule)); + + if (old_rule) { + const struct rule_actions *old_actions = rule_get_actions(old_rule); + + enum nx_flow_update_event event = fm->command == OFPFC_ADD + ? NXFME_ADDED : NXFME_MODIFIED; + + bool change_cookie = (fm->modify_cookie + && fm->new_cookie != OVS_BE64_MAX + && fm->new_cookie != old_rule->flow_cookie); + + bool change_actions = !ofpacts_equal(fm->ofpacts, + fm->ofpacts_len, + old_actions->ofpacts, + old_actions->ofpacts_len); + + /* Remove the old rule from data structures. Removal from the + * classifier and the deletion of the rule is RCU postponed by the + * caller. */ + ofproto_rule_remove__(ofproto, old_rule); + learned_cookies_dec(ofproto, old_actions, dead_cookies); + + if (event != NXFME_MODIFIED || change_actions || change_cookie) { + ofmonitor_report(ofproto->connmgr, new_rule, event, 0, + req ? req->ofconn : NULL, + req ? req->request->xid : 0, + change_actions ? old_actions : NULL); + } } - learned_cookies_flush(ofproto, &dead_cookies); } static enum ofperr modify_flows_start__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - struct rule_collection *rules) + struct rule_collection *old_rules, + struct rule_collection *new_rules) OVS_REQUIRES(ofproto_mutex) { enum ofperr error; - if (rules->n > 0) { - error = modify_flows_check__(ofproto, fm, rules); + rule_collection_init(new_rules); + + if (old_rules->n > 0) { + struct cls_conjunction *conjs; + size_t n_conjs; + size_t i; + + /* Create a new 'modified' rule for each old rule. */ + for (i = 0; i < old_rules->n; i++) { + struct rule *old_rule = old_rules->rules[i]; + struct rule *new_rule; + struct cls_rule cr; + + cls_rule_clone_in_version(&cr, &old_rule->cr, + ofproto->tables_version + 1); + error = replace_rule_create(ofproto, fm, &cr, old_rule->table_id, + old_rule, &new_rule); + if (!error) { + rule_collection_add(new_rules, new_rule); + } else { + rule_collection_unref(new_rules); + rule_collection_destroy(new_rules); + return error; + } + } + ovs_assert(new_rules->n == old_rules->n); + + get_conjunctions(fm, &conjs, &n_conjs); + for (i = 0; i < old_rules->n; i++) { + replace_rule_start(ofproto, old_rules->rules[i], + new_rules->rules[i], conjs, n_conjs); + } + free(conjs); } else if (!(fm->cookie_mask != htonll(0) || fm->new_cookie == OVS_BE64_MAX)) { - bool modify; - - error = add_flow_start(ofproto, fm, &rules->rules[0], &modify); + /* No match, add a new flow. */ + error = add_flow_start(ofproto, fm, &old_rules->rules[0], + &new_rules->rules[0]); if (!error) { - ovs_assert(!modify); + ovs_assert(!old_rules->rules[0]); } + new_rules->n = 1; } else { - rules->rules[0] = NULL; error = 0; } + return error; } @@ -4744,7 +4798,8 @@ modify_flows_start__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, * if any. */ static enum ofperr modify_flows_start_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - struct rule_collection *rules) + struct rule_collection *old_rules, + struct rule_collection *new_rules) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; @@ -4754,50 +4809,70 @@ modify_flows_start_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, fm->cookie, fm->cookie_mask, OFPP_ANY, OFPG11_ANY); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); - error = collect_rules_loose(ofproto, &criteria, rules); + error = collect_rules_loose(ofproto, &criteria, old_rules); rule_criteria_destroy(&criteria); if (!error) { - error = modify_flows_start__(ofproto, fm, rules); + error = modify_flows_start__(ofproto, fm, old_rules, new_rules); } if (error) { - rule_collection_destroy(rules); + rule_collection_destroy(old_rules); } return error; } static void -modify_flows_revert(struct ofproto *ofproto, struct rule_collection *rules) +modify_flows_revert(struct ofproto *ofproto, struct rule_collection *old_rules, + struct rule_collection *new_rules) OVS_REQUIRES(ofproto_mutex) { - /* Old rules were not changed yet, only need to revert a new rule. */ - if (rules->n == 0 && rules->rules[0] != NULL) { - add_flow_revert(ofproto, rules->rules[0]); + /* Old rules were not changed yet, only need to revert new rules. */ + if (old_rules->n == 0 && new_rules->n == 1) { + add_flow_revert(ofproto, new_rules->rules[0], NULL); + } else if (old_rules->n > 0) { + for (size_t i = 0; i < old_rules->n; i++) { + replace_rule_revert(ofproto, old_rules->rules[i], + new_rules->rules[i]); + } + rule_collection_destroy(new_rules); + rule_collection_destroy(old_rules); } - rule_collection_destroy(rules); } static void modify_flows_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, - struct rule_collection *rules) + struct rule_collection *old_rules, + struct rule_collection *new_rules) OVS_REQUIRES(ofproto_mutex) { - if (rules->n > 0) { - modify_flows__(ofproto, fm, req, rules); - send_buffered_packet(req, fm->buffer_id, rules->rules[0]); - } else if (rules->rules[0] != NULL) { - add_flow_finish(ofproto, fm, req, rules->rules[0], false); + if (old_rules->n == 0 && new_rules->n == 1) { + add_flow_finish(ofproto, fm, req, old_rules->rules[0], + new_rules->rules[0]); + } else if (old_rules->n > 0) { + struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); + + ovs_assert(new_rules->n == old_rules->n); + + for (size_t i = 0; i < old_rules->n; i++) { + replace_rule_finish(ofproto, fm, req, old_rules->rules[i], + new_rules->rules[i], &dead_cookies); + } + learned_cookies_flush(ofproto, &dead_cookies); + rule_collection_remove_postponed(old_rules); + + send_buffered_packet(req, fm->buffer_id, new_rules->rules[0]); + rule_collection_destroy(new_rules); } - rule_collection_destroy(rules); } /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error * code on failure. */ static enum ofperr modify_flow_start_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, - struct rule_collection *rules) + struct rule_collection *old_rules, + struct rule_collection *new_rules) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; @@ -4808,68 +4883,76 @@ modify_flow_start_strict(struct ofproto *ofproto, struct ofputil_flow_mod *fm, OFPG11_ANY); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); - error = collect_rules_strict(ofproto, &criteria, rules); + error = collect_rules_strict(ofproto, &criteria, old_rules); rule_criteria_destroy(&criteria); if (!error) { /* collect_rules_strict() can return max 1 rule. */ - error = modify_flows_start__(ofproto, fm, rules); + error = modify_flows_start__(ofproto, fm, old_rules, new_rules); } if (error) { - rule_collection_destroy(rules); + rule_collection_destroy(old_rules); } return error; } /* OFPFC_DELETE implementation. */ -/* Deletes the rules listed in 'rules'. */ static void -delete_flows__(const struct rule_collection *rules, +delete_flows_start__(struct ofproto *ofproto, + const struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + for (size_t i = 0; i < rules->n; i++) { + cls_rule_make_invisible_in_version(&rules->rules[i]->cr, + ofproto->tables_version + 1, + ofproto->tables_version); + } +} + +static void +delete_flows_finish__(struct ofproto *ofproto, + struct rule_collection *rules, + enum ofp_flow_removed_reason reason, + const struct flow_mod_requester *req) + OVS_REQUIRES(ofproto_mutex) +{ + if (rules->n) { + struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); + + for (size_t i = 0; i < rules->n; i++) { + struct rule *rule = rules->rules[i]; + + ofproto_rule_send_removed(rule, reason); + ofmonitor_report(ofproto->connmgr, rule, NXFME_DELETED, reason, + req ? req->ofconn : NULL, + req ? req->request->xid : 0, NULL); + ofproto_rule_remove__(ofproto, rule); + learned_cookies_dec(ofproto, rule_get_actions(rule), + &dead_cookies); + } + rule_collection_remove_postponed(rules); + + learned_cookies_flush(ofproto, &dead_cookies); + } +} + +/* Deletes the rules listed in 'rules'. + * The deleted rules will become invisible to the lookups in the next version. + * Destroys 'rules'. */ +static void +delete_flows__(struct rule_collection *rules, enum ofp_flow_removed_reason reason, const struct flow_mod_requester *req) OVS_REQUIRES(ofproto_mutex) { if (rules->n) { - struct ovs_list dead_cookies = OVS_LIST_INITIALIZER(&dead_cookies); struct ofproto *ofproto = rules->rules[0]->ofproto; - struct rule *rule, *next; - uint8_t prev_table = UINT8_MAX; - size_t i; - for (i = 0, next = rules->rules[0]; - rule = next, next = (++i < rules->n) ? rules->rules[i] : NULL, - rule; prev_table = rule->table_id) { - struct classifier *cls = &ofproto->tables[rule->table_id].cls; - uint8_t next_table = next ? next->table_id : UINT8_MAX; - - ofproto_rule_send_removed(rule, reason); - - ofmonitor_report(ofproto->connmgr, rule, NXFME_DELETED, reason, - req ? req->ofconn : NULL, - req ? req->request->xid : 0, NULL); - - /* Defer once for each new table. */ - if (rule->table_id != prev_table) { - classifier_defer(cls); - } - if (!classifier_remove(cls, &rule->cr)) { - OVS_NOT_REACHED(); - } - if (next_table != rule->table_id) { - classifier_publish(cls); - } - ofproto_rule_remove__(ofproto, rule); - - ofproto->ofproto_class->rule_delete(rule); - - learned_cookies_dec(ofproto, rule_get_actions(rule), - &dead_cookies); - - ofproto_rule_unref(rule); - } - learned_cookies_flush(ofproto, &dead_cookies); + delete_flows_start__(ofproto, rules); + ofproto_bump_tables_version(ofproto); + delete_flows_finish__(ofproto, rules, reason, req); ofmonitor_flush(ofproto->connmgr); } } @@ -4885,45 +4968,42 @@ delete_flows_start_loose(struct ofproto *ofproto, enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, CLS_MAX_VERSION, - fm->cookie, fm->cookie_mask, - fm->out_port, fm->out_group); + fm->cookie, fm->cookie_mask, fm->out_port, + fm->out_group); rule_criteria_require_rw(&criteria, (fm->flags & OFPUTIL_FF_NO_READONLY) != 0); error = collect_rules_loose(ofproto, &criteria, rules); rule_criteria_destroy(&criteria); if (!error) { - for (size_t i = 0; i < rules->n; i++) { - struct rule *rule = rules->rules[i]; - - cls_rule_make_invisible_in_version(CONST_CAST(struct cls_rule *, - &rule->cr), - CLS_MIN_VERSION, - CLS_MIN_VERSION); - } + delete_flows_start__(ofproto, rules); } return error; } static void -delete_flows_revert(struct rule_collection *rules) +delete_flows_revert(struct ofproto *ofproto OVS_UNUSED, + struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { for (size_t i = 0; i < rules->n; i++) { - cls_rule_restore_visibility(&rules->rules[i]->cr); + struct rule *rule = rules->rules[i]; + + /* Restore the original visibility of the rule. */ + cls_rule_restore_visibility(&rule->cr); } rule_collection_destroy(rules); } static void -delete_flows_finish(const struct ofputil_flow_mod *fm, +delete_flows_finish(struct ofproto *ofproto, + const struct ofputil_flow_mod *fm, const struct flow_mod_requester *req, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { - delete_flows__(rules, fm->delete_reason, req); - rule_collection_destroy(rules); + delete_flows_finish__(ofproto, rules, fm->delete_reason, req); } /* Implements OFPFC_DELETE_STRICT. */ @@ -4945,14 +5025,7 @@ delete_flow_start_strict(struct ofproto *ofproto, rule_criteria_destroy(&criteria); if (!error) { - for (size_t i = 0; i < rules->n; i++) { - struct rule *rule = rules->rules[i]; - - cls_rule_make_invisible_in_version(CONST_CAST(struct cls_rule *, - &rule->cr), - CLS_MIN_VERSION, - CLS_MIN_VERSION); - } + delete_flows_start__(ofproto, rules); } return error; @@ -5094,6 +5167,7 @@ handle_flow_mod__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, ovs_mutex_lock(&ofproto_mutex); error = do_bundle_flow_mod_start(ofproto, fm, &be); if (!error) { + ofproto_bump_tables_version(ofproto); do_bundle_flow_mod_finish(ofproto, fm, req, &be); } ofmonitor_flush(ofproto->connmgr); @@ -5665,7 +5739,6 @@ handle_delete_meter(struct ofconn *ofconn, struct ofputil_meter_mod *mm) meter_delete(ofproto, first, last); ovs_mutex_unlock(&ofproto_mutex); - rule_collection_destroy(&rules); return error; } @@ -6464,19 +6537,19 @@ do_bundle_flow_mod_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, { switch (fm->command) { case OFPFC_ADD: - return add_flow_start(ofproto, fm, &be->rule, &be->modify); - + return add_flow_start(ofproto, fm, &be->old_rules.stub[0], + &be->new_rules.stub[0]); case OFPFC_MODIFY: - return modify_flows_start_loose(ofproto, fm, &be->rules); - + return modify_flows_start_loose(ofproto, fm, &be->old_rules, + &be->new_rules); case OFPFC_MODIFY_STRICT: - return modify_flow_start_strict(ofproto, fm, &be->rules); - + return modify_flow_start_strict(ofproto, fm, &be->old_rules, + &be->new_rules); case OFPFC_DELETE: - return delete_flows_start_loose(ofproto, fm, &be->rules); + return delete_flows_start_loose(ofproto, fm, &be->old_rules); case OFPFC_DELETE_STRICT: - return delete_flow_start_strict(ofproto, fm, &be->rules); + return delete_flow_start_strict(ofproto, fm, &be->old_rules); } return OFPERR_OFPFMFC_BAD_COMMAND; @@ -6489,17 +6562,17 @@ do_bundle_flow_mod_revert(struct ofproto *ofproto, struct ofputil_flow_mod *fm, { switch (fm->command) { case OFPFC_ADD: - add_flow_revert(ofproto, be->modify ? NULL : be->rule); + add_flow_revert(ofproto, be->old_rules.stub[0], be->new_rules.stub[0]); break; case OFPFC_MODIFY: case OFPFC_MODIFY_STRICT: - modify_flows_revert(ofproto, &be->rules); + modify_flows_revert(ofproto, &be->old_rules, &be->new_rules); break; case OFPFC_DELETE: case OFPFC_DELETE_STRICT: - delete_flows_revert(&be->rules); + delete_flows_revert(ofproto, &be->old_rules); break; default: @@ -6515,17 +6588,18 @@ do_bundle_flow_mod_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, { switch (fm->command) { case OFPFC_ADD: - add_flow_finish(ofproto, fm, req, be->rule, be->modify); + add_flow_finish(ofproto, fm, req, be->old_rules.stub[0], + be->new_rules.stub[0]); break; case OFPFC_MODIFY: case OFPFC_MODIFY_STRICT: - modify_flows_finish(ofproto, fm, req, &be->rules); + modify_flows_finish(ofproto, fm, req, &be->old_rules, &be->new_rules); break; case OFPFC_DELETE: case OFPFC_DELETE_STRICT: - delete_flows_finish(fm, req, &be->rules); + delete_flows_finish(ofproto, fm, req, &be->old_rules); break; default: @@ -6533,6 +6607,25 @@ do_bundle_flow_mod_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } } +/* Commit phases (all while locking ofproto_mutex): + * + * 1. Begin: Gather resources and make changes visible in the next version. + * - Mark affected rules for removal in the next version. + * - Create new replacement rules, make visible in the next + * version. + * - Do not send any events or notifications. + * + * 2. Revert: Fail if any errors are found. After this point no errors are + * possible. No visible changes were made, so rollback is minimal (remove + * added invisible rules, restore visibility of rules marked for removal). + * + * 3. Bump the version visible to lookups. + * + * 4. Finish: Insert replacement rules to the ofproto provider. Remove replaced + * and deleted rules from ofproto data structures, and Schedule postponed + * removal of deleted rules from the classifier. Send notifications, buffered + * packets, etc. + */ static enum ofperr do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) { @@ -6551,6 +6644,8 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) } else { error = 0; ovs_mutex_lock(&ofproto_mutex); + + /* 1. Begin. */ LIST_FOR_EACH (be, node, &bundle->msg_list) { if (be->type == OFPTYPE_PORT_MOD) { /* Not supported yet. */ @@ -6571,14 +6666,21 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) error = OFPERR_OFPBFC_MSG_FAILED; } - /* Revert all previous entires. */ + /* 2. Revert. Undo all the changes made above. */ LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) { if (be->type == OFPTYPE_FLOW_MOD) { do_bundle_flow_mod_revert(ofproto, &be->fm, be); } } } else { - /* Finish the changes. */ + /* 3. Bump the version. This makes all the changes in the bundle + * visible to the lookups at once. For this to work an upcall must + * read the tables_version once at the beginning and keep using the + * same version number for the whole duration of the upcall + * processing. */ + ofproto_bump_tables_version(ofproto); + + /* 4. Finish. */ LIST_FOR_EACH (be, node, &bundle->msg_list) { if (be->type == OFPTYPE_FLOW_MOD) { struct flow_mod_requester req = { ofconn, be->ofp_msg }; @@ -6615,10 +6717,6 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) if (error) { return error; } - /* Atomic updates not supported yet. */ - if (bctrl.flags & OFPBF_ATOMIC) { - return OFPERR_OFPBFC_BAD_FLAGS; - } reply.flags = 0; reply.bundle_id = bctrl.bundle_id; @@ -7326,6 +7424,8 @@ ofproto_rule_insert__(struct ofproto *ofproto, struct rule *rule) { const struct rule_actions *actions = rule_get_actions(rule); + ovs_assert(rule->removed); + if (rule->hard_timeout || rule->idle_timeout) { list_insert(&ofproto->expirable, &rule->expirable); } @@ -7334,14 +7434,16 @@ ofproto_rule_insert__(struct ofproto *ofproto, struct rule *rule) if (actions->has_meter) { meter_insert_rule(rule); } + rule->removed = false; } -/* Removes 'rule' from the ofproto data structures AFTER caller has removed - * it from the classifier. */ +/* Removes 'rule' from the ofproto data structures. */ static void ofproto_rule_remove__(struct ofproto *ofproto, struct rule *rule) OVS_REQUIRES(ofproto_mutex) { + ovs_assert(!rule->removed); + cookies_remove(ofproto, rule); eviction_group_remove_rule(rule); @@ -7352,19 +7454,8 @@ ofproto_rule_remove__(struct ofproto *ofproto, struct rule *rule) list_remove(&rule->meter_list_node); list_init(&rule->meter_list_node); } -} -static void -oftable_remove_rule(struct rule *rule) - OVS_REQUIRES(ofproto_mutex) -{ - struct classifier *cls = &rule->ofproto->tables[rule->table_id].cls; - - if (classifier_remove(cls, &rule->cr)) { - ofproto_rule_remove__(rule->ofproto, rule); - } else { - OVS_NOT_REACHED(); - } + rule->removed = true; } /* unixctl commands. */ diff --git a/tests/ofproto.at b/tests/ofproto.at index 1fa5b2d80..9c5f0bb4e 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -3526,38 +3526,38 @@ vconn|DBG|unix: received: OFPT_HELLO (OF1.4): version bitmap: 0x01, 0x05 vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=OPEN_REQUEST flags=ordered + bundle_id=0 type=OPEN_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=OPEN_REPLY flags=0 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa idle:50 actions=output:1 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:77:88:99:aa:bb idle:60 actions=output:2 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:88:99:aa:bb:cc idle:70 actions=output:3 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa idle:50 actions=output:4 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): DEL table:255 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa idle:50 actions=output:5 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:77:88:99:aa:bb idle:60 actions=output:6 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:88:99:aa:bb:cc idle:70 actions=output:7 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): DEL table:255 in_port=2,dl_src=00:88:99:aa:bb:cc actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=COMMIT_REQUEST flags=ordered + bundle_id=0 type=COMMIT_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=COMMIT_REPLY flags=0 vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): @@ -3578,17 +3578,17 @@ vconn|DBG|unix: received: OFPT_HELLO (OF1.4): version bitmap: 0x01, 0x05 vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=OPEN_REQUEST flags=ordered + bundle_id=0 type=OPEN_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=OPEN_REPLY flags=0 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): MOD actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): MOD_STRICT in_port=2,dl_src=00:77:88:99:aa:bb actions=output:7 vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=COMMIT_REQUEST flags=ordered + bundle_id=0 type=COMMIT_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=COMMIT_REPLY flags=0 vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): @@ -3609,20 +3609,20 @@ vconn|DBG|unix: received: OFPT_HELLO (OF1.4): version bitmap: 0x01, 0x05 vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=OPEN_REQUEST flags=ordered + bundle_id=0 type=OPEN_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=OPEN_REPLY flags=0 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:77:88:99:aa:bb idle:60 actions=output:8 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): DEL_STRICT table:255 in_port=2,dl_src=00:66:77:88:99:aa actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD in_port=2,dl_src=00:66:77:88:99:aa actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=COMMIT_REQUEST flags=ordered + bundle_id=0 type=COMMIT_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=COMMIT_REPLY flags=0 vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): @@ -3681,7 +3681,7 @@ OFPT_ERROR (OF1.4) (xid=0xb): OFPBRC_EPERM OFPT_FLOW_MOD (OF1.4) (xid=0xb): ADD table:254 actions=drop OFPT_ERROR (OF1.4) (xid=0xd): OFPBFC_MSG_FAILED OFPT_BUNDLE_CONTROL (OF1.4) (xid=0xd): - bundle_id=0 type=COMMIT_REQUEST flags=ordered + bundle_id=0 type=COMMIT_REQUEST flags=atomic ordered ovs-ofctl: talking to unix:br0.mgmt (Protocol error) ]) diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index b7db9bb78..6c4856926 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -2843,35 +2843,35 @@ vconn|DBG|unix: received: OFPT_HELLO (OF1.4): version bitmap: 0x01, 0x05 vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports versions 0x01, 0x05) vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=OPEN_REQUEST flags=ordered + bundle_id=0 type=OPEN_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=OPEN_REPLY flags=0 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=1 importance:1 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=2 importance:2 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=3 importance:3 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=4 importance:4 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=5 importance:5 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=6 importance:6 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=7 importance:7 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=8 importance:8 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=COMMIT_REQUEST flags=ordered + bundle_id=0 type=COMMIT_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=COMMIT_REPLY flags=0 vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): @@ -2882,23 +2882,23 @@ vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and ea vconn|DBG|unix: received: OFPST_FLOW request (OF1.4): vconn|DBG|unix: sent (Success): OFPST_FLOW reply (OF1.4): vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=OPEN_REQUEST flags=ordered + bundle_id=0 type=OPEN_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=OPEN_REPLY flags=0 vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=1 importance:11 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=3 importance:13 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=5 importance:15 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_ADD_MESSAGE (OF1.4): - bundle_id=0 flags=ordered + bundle_id=0 flags=atomic ordered OFPT_FLOW_MOD (OF1.4): ADD dl_vlan=7 importance:17 actions=drop vconn|DBG|unix: received: OFPT_BUNDLE_CONTROL (OF1.4): - bundle_id=0 type=COMMIT_REQUEST flags=ordered + bundle_id=0 type=COMMIT_REQUEST flags=atomic ordered vconn|DBG|unix: sent (Success): OFPT_BUNDLE_CONTROL (OF1.4): bundle_id=0 type=COMMIT_REPLY flags=0 vconn|DBG|unix: sent (Success): OFPT_HELLO (OF1.5): @@ -2908,14 +2908,14 @@ vconn|DBG|unix: received: OFPT_HELLO (OF1.4): vconn|DBG|unix: negotiated OpenFlow version 0x05 (we support version 0x06 and earlier, peer supports version 0x05) vconn|DBG|unix: received: OFPST_FLOW request (OF1.4): vconn|DBG|unix: sent (Success): OFPST_FLOW reply (OF1.4): - importance=11, dl_vlan=1 actions=drop importance=2, dl_vlan=2 actions=drop - importance=13, dl_vlan=3 actions=drop importance=4, dl_vlan=4 actions=drop - importance=15, dl_vlan=5 actions=drop importance=6, dl_vlan=6 actions=drop - importance=17, dl_vlan=7 actions=drop importance=8, dl_vlan=8 actions=drop + importance=11, dl_vlan=1 actions=drop + importance=13, dl_vlan=3 actions=drop + importance=15, dl_vlan=5 actions=drop + importance=17, dl_vlan=7 actions=drop ]) OVS_VSWITCHD_STOP diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index 2c6a07332..63c2ecca8 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -298,8 +298,8 @@ These commands manage the flow table in an OpenFlow switch. In each case, \fIflow\fR specifies a flow entry in the format described in \fBFlow Syntax\fR, below, \fIfile\fR is a text file that contains zero or more flows in the same syntax, one per line, and the optional -\fB\-\-bundle\fR option operates the command as a single transation, -see option \fB\-\-bundle\fR, below. +\fB\-\-bundle\fR option operates the command as a single atomic +transation, see option \fB\-\-bundle\fR, below. . .IP "[\fB\-\-bundle\fR] \fBadd\-flow \fIswitch flow\fR" .IQ "[\fB\-\-bundle\fR] \fBadd\-flow \fIswitch \fB\- < \fIfile\fR" @@ -2397,13 +2397,16 @@ depending on its configuration. Uses strict matching when running flow modification commands. . .IP "\fB\-\-bundle\fR" -Execute flow mods as an OpenFlow 1.4 bundle transaction. +Execute flow mods as an OpenFlow 1.4 atomic bundle transaction. .RS .IP \(bu Within a bundle, all flow mods are processed in the order they appear -and as a single transaction, meaning that if one of them fails, the -whole transaction fails and none of the changes are made to the -\fIswitch\fR's flow table. +and as a single atomic transaction, meaning that if one of them fails, +the whole transaction fails and none of the changes are made to the +\fIswitch\fR's flow table, and that each given datapath packet +traversing the OpenFlow tables sees the flow tables either as before +the transaction, or after all the flow mods in the bundle have been +successfully applied. .IP \(bu The beginning and the end of the flow table modification commands in a bundle are delimited with OpenFlow 1.4 bundle control messages, which @@ -2416,10 +2419,6 @@ Bundles require OpenFlow 1.4 or higher. An explicit \fB-O OpenFlow14\fR option is not needed, but you may need to enable OpenFlow 1.4 support for OVS by setting the OVSDB \fIprotocols\fR column in the \fIbridge\fR table. -.IP \(bu -Current implementation executes all bundles with the 'ordered' flag, -so that the flow mods are always executed in the order specified. -Atomic bundles are not yet supported. .RE . .so lib/ofp-version.man diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 812ce7fff..8df79b853 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -1223,7 +1223,7 @@ bundle_flow_mod__(const char *remote, struct ofputil_flow_mod *fms, free(CONST_CAST(struct ofpact *, fm->ofpacts)); } - bundle_transact(vconn, &requests, OFPBF_ORDERED); + bundle_transact(vconn, &requests, OFPBF_ORDERED | OFPBF_ATOMIC); vconn_close(vconn); } @@ -2700,7 +2700,7 @@ ofctl_replace_flows(struct ovs_cmdl_context *ctx) } } if (bundle) { - bundle_transact(vconn, &requests, OFPBF_ORDERED); + bundle_transact(vconn, &requests, OFPBF_ORDERED | OFPBF_ATOMIC); } else { transact_multiple_noreply(vconn, &requests); } From d79e3d701072f0c6b2f7b28e39b291eb12390997 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 15:53:43 -0700 Subject: [PATCH 142/146] ofproto: Accurate flow counts. Classifier's rule count now contains temporary duplicates and rules whose deletion has been deferred. Maintain a new 'n_flows' count in struct oftable to as the count of rules in the latest version. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/connmgr.c | 2 +- ofproto/ofproto-provider.h | 5 ++++- ofproto/ofproto.c | 32 ++++++++++++++++++++++---------- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 1fee86084..975ee335b 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -2027,7 +2027,7 @@ connmgr_flushed(struct connmgr *mgr) /* Returns the number of hidden rules created by the in-band and fail-open * implementations in table 0. (Subtracting this count from the number of - * rules in the table 0 classifier, as returned by classifier_count(), yields + * rules in the table 0 classifier, as maintained in struct oftable, yields * the number of flows that OVS should report via OpenFlow for table 0.) */ int connmgr_count_hidden_rules(const struct connmgr *mgr) diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 07229c598..fd66e494b 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -220,6 +220,9 @@ struct oftable { /* Maximum number of flows or UINT_MAX if there is no limit besides any * limit imposed by resource limitations. */ unsigned int max_flows; + /* Current number of flows, not counting temporary duplicates nor deferred + * deletions. */ + unsigned int n_flows; /* These members determine the handling of an attempt to add a flow that * would cause the table to have more than 'max_flows' flows. @@ -818,7 +821,7 @@ struct ofproto_class { * * - 'table_id' to the array index. * - * - 'active_count' to the classifier_count() for the table. + * - 'active_count' to the 'n_flows' of struct ofproto for the table. * * - 'lookup_count' and 'matched_count' to 0. * diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 5242cf0e6..64aaadd72 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1698,12 +1698,11 @@ ofproto_run(struct ofproto *p) continue; } - if (classifier_count(&table->cls) > 100000) { + if (table->n_flows > 100000) { static struct vlog_rate_limit count_rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&count_rl, "Table %"PRIuSIZE" has an excessive" - " number of rules: %d", i, - classifier_count(&table->cls)); + " number of rules: %d", i, table->n_flows); } ovs_mutex_lock(&ofproto_mutex); @@ -1797,7 +1796,7 @@ ofproto_get_memory_usage(const struct ofproto *ofproto, struct simap *usage) n_rules = 0; OFPROTO_FOR_EACH_TABLE (table, ofproto) { - n_rules += classifier_count(&table->cls); + n_rules += table->n_flows; } simap_increase(usage, "rules", n_rules); @@ -3158,10 +3157,9 @@ query_tables(struct ofproto *ofproto, stats = *statsp = xcalloc(ofproto->n_tables, sizeof *stats); for (i = 0; i < ofproto->n_tables; i++) { struct ofputil_table_stats *s = &stats[i]; - struct classifier *cls = &ofproto->tables[i].cls; s->table_id = i; - s->active_count = classifier_count(cls); + s->active_count = ofproto->tables[i].n_flows; if (i == 0) { s->active_count -= connmgr_count_hidden_rules( ofproto->connmgr); @@ -4361,7 +4359,7 @@ evict_rules_from_table(struct oftable *table, unsigned int extra_space) { enum ofperr error = 0; struct rule_collection rules; - unsigned int count = classifier_count(&table->cls) + extra_space; + unsigned int count = table->n_flows + extra_space; unsigned int max_flows = table->max_flows; rule_collection_init(&rules); @@ -4657,6 +4655,8 @@ replace_rule_start(struct ofproto *ofproto, cls_rule_make_invisible_in_version(&old_rule->cr, ofproto->tables_version + 1, ofproto->tables_version); + } else { + table->n_flows++; } /* Insert flow to the classifier, so that later flow_mods may relate * to it. This is reversible, in case later errors require this to @@ -4672,9 +4672,12 @@ static void replace_rule_revert(struct ofproto *ofproto, { struct oftable *table = &ofproto->tables[new_rule->table_id]; - /* Restore the original visibility of the old rule. */ if (old_rule) { + /* Restore the original visibility of the old rule. */ cls_rule_restore_visibility(&old_rule->cr); + } else { + /* Restore table's rule count. */ + table->n_flows--; } /* Remove the new rule immediately. It was never visible to lookups. */ @@ -4905,7 +4908,11 @@ delete_flows_start__(struct ofproto *ofproto, OVS_REQUIRES(ofproto_mutex) { for (size_t i = 0; i < rules->n; i++) { - cls_rule_make_invisible_in_version(&rules->rules[i]->cr, + struct rule *rule = rules->rules[i]; + struct oftable *table = &ofproto->tables[rule->table_id]; + + table->n_flows--; + cls_rule_make_invisible_in_version(&rule->cr, ofproto->tables_version + 1, ofproto->tables_version); } @@ -4983,12 +4990,16 @@ delete_flows_start_loose(struct ofproto *ofproto, } static void -delete_flows_revert(struct ofproto *ofproto OVS_UNUSED, +delete_flows_revert(struct ofproto *ofproto, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { for (size_t i = 0; i < rules->n; i++) { struct rule *rule = rules->rules[i]; + struct oftable *table = &ofproto->tables[rule->table_id]; + + /* Restore table's rule count. */ + table->n_flows++; /* Restore the original visibility of the rule. */ cls_rule_restore_visibility(&rule->cr); @@ -7315,6 +7326,7 @@ oftable_init(struct oftable *table) memset(table, 0, sizeof *table); classifier_init(&table->cls, flow_segment_u64s); table->max_flows = UINT_MAX; + table->n_flows = 0; atomic_init(&table->miss_config, OFPUTIL_TABLE_MISS_DEFAULT); classifier_set_prefix_fields(&table->cls, default_prefix_fields, From 6787a49f69168e78f5d8af8aa8be128385650a50 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 15:53:43 -0700 Subject: [PATCH 143/146] ofproto: Revertible eviction. Handling evictions was broken in the previous patches. Eviction took place early in the commit, and actually inappropriately bumped the version number too early. Now eviction is treated much like a flow modification, where a new rule replaces the old one, but just without any 'inheritance' from the evicted rule to the new rule. This makes evictions to be executed only when commit is successful, as evictions are reverted like any other changes when the commit fails. Signed-off-by: Jarno Rajahalme Acked-by: Ben Pfaff --- ofproto/ofproto.c | 119 +++++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 43 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 64aaadd72..cb8a941af 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -81,8 +81,7 @@ static void oftable_destroy(struct oftable *); static void oftable_set_name(struct oftable *, const char *name); -static enum ofperr evict_rules_from_table(struct oftable *, - unsigned int extra_space) +static enum ofperr evict_rules_from_table(struct oftable *) OVS_REQUIRES(ofproto_mutex); static void oftable_disable_eviction(struct oftable *); static void oftable_enable_eviction(struct oftable *, @@ -265,8 +264,8 @@ static void replace_rule_start(struct ofproto *, struct cls_conjunction *, size_t n_conjs) OVS_REQUIRES(ofproto_mutex); -static void replace_rule_revert(struct ofproto *, - struct rule *old_rule, struct rule *new_rule) +static void replace_rule_revert(struct ofproto *, struct rule *old_rule, + struct rule *new_rule) OVS_REQUIRES(ofproto_mutex); static void replace_rule_finish(struct ofproto *, struct ofputil_flow_mod *, @@ -1433,7 +1432,7 @@ ofproto_configure_table(struct ofproto *ofproto, int table_id, } ovs_mutex_lock(&ofproto_mutex); - evict_rules_from_table(table, 0); + evict_rules_from_table(table); ovs_mutex_unlock(&ofproto_mutex); } @@ -4354,12 +4353,12 @@ handle_queue_stats_request(struct ofconn *ofconn, } static enum ofperr -evict_rules_from_table(struct oftable *table, unsigned int extra_space) +evict_rules_from_table(struct oftable *table) OVS_REQUIRES(ofproto_mutex) { enum ofperr error = 0; struct rule_collection rules; - unsigned int count = table->n_flows + extra_space; + unsigned int count = table->n_flows; unsigned int max_flows = table->max_flows; rule_collection_init(&rules); @@ -4494,10 +4493,17 @@ add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } /* If necessary, evict an existing rule to clear out space. */ - error = evict_rules_from_table(table, 1); - if (error) { - cls_rule_destroy(&cr); - return error; + if (table->n_flows >= table->max_flows) { + if (!choose_rule_to_evict(table, &rule)) { + error = OFPERR_OFPFMFC_TABLE_FULL; + cls_rule_destroy(&cr); + return error; + } + eviction_group_remove_rule(rule); + /* Marks '*old_rule' as an evicted rule rather than replaced rule. + */ + fm->delete_reason = OFPRR_EVICTION; + *old_rule = rule; } } else { fm->modify_cookie = true; @@ -4517,13 +4523,17 @@ add_flow_start(struct ofproto *ofproto, struct ofputil_flow_mod *fm, return 0; } -/* Revert the effects of add_flow_start(). - * XXX: evictions cannot be reverted. */ +/* Revert the effects of add_flow_start(). */ static void -add_flow_revert(struct ofproto *ofproto, struct rule *old_rule, - struct rule *new_rule) +add_flow_revert(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule *old_rule, struct rule *new_rule) OVS_REQUIRES(ofproto_mutex) { + if (old_rule && fm->delete_reason == OFPRR_EVICTION) { + /* Revert the eviction. */ + eviction_group_add_rule(old_rule); + } + replace_rule_revert(ofproto, old_rule, new_rule); } @@ -4609,7 +4619,7 @@ replace_rule_create(struct ofproto *ofproto, struct ofputil_flow_mod *fm, rule->modify_seqno = 0; /* Copy values from old rule for modify semantics. */ - if (old_rule) { + if (old_rule && fm->delete_reason != OFPRR_EVICTION) { /* 'fm' says that */ bool change_cookie = (fm->modify_cookie && fm->new_cookie != OVS_BE64_MAX @@ -4650,6 +4660,7 @@ replace_rule_start(struct ofproto *ofproto, { struct oftable *table = &ofproto->tables[new_rule->table_id]; + /* 'old_rule' may be either an evicted rule or replaced rule. */ if (old_rule) { /* Mark the old rule for removal in the next version. */ cls_rule_make_invisible_in_version(&old_rule->cr, @@ -4699,40 +4710,57 @@ replace_rule_finish(struct ofproto *ofproto, struct ofputil_flow_mod *fm, OVS_REQUIRES(ofproto_mutex) { bool forward_stats = !(fm->flags & OFPUTIL_FF_RESET_COUNTS); + struct rule *replaced_rule; - /* Insert the new flow to the ofproto provider. A non-NULL 'old_rule' is a - * duplicate rule the 'new_rule' is replacing. The provider should link - * the stats from the old rule to the new one if 'forward_stats' is - * 'true'. The 'old_rule' will be deleted right after this call. */ - ofproto->ofproto_class->rule_insert(new_rule, old_rule, forward_stats); + replaced_rule = fm->delete_reason != OFPRR_EVICTION ? old_rule : NULL; + + /* Insert the new flow to the ofproto provider. A non-NULL 'replaced_rule' + * is a duplicate rule the 'new_rule' is replacing. The provider should + * link the stats from the old rule to the new one if 'forward_stats' is + * 'true'. The 'replaced_rule' will be deleted right after this call. */ + ofproto->ofproto_class->rule_insert(new_rule, replaced_rule, + forward_stats); learned_cookies_inc(ofproto, rule_get_actions(new_rule)); if (old_rule) { const struct rule_actions *old_actions = rule_get_actions(old_rule); - enum nx_flow_update_event event = fm->command == OFPFC_ADD - ? NXFME_ADDED : NXFME_MODIFIED; - - bool change_cookie = (fm->modify_cookie - && fm->new_cookie != OVS_BE64_MAX - && fm->new_cookie != old_rule->flow_cookie); - - bool change_actions = !ofpacts_equal(fm->ofpacts, - fm->ofpacts_len, - old_actions->ofpacts, - old_actions->ofpacts_len); - /* Remove the old rule from data structures. Removal from the * classifier and the deletion of the rule is RCU postponed by the * caller. */ ofproto_rule_remove__(ofproto, old_rule); learned_cookies_dec(ofproto, old_actions, dead_cookies); - if (event != NXFME_MODIFIED || change_actions || change_cookie) { - ofmonitor_report(ofproto->connmgr, new_rule, event, 0, + if (replaced_rule) { + enum nx_flow_update_event event = fm->command == OFPFC_ADD + ? NXFME_ADDED : NXFME_MODIFIED; + + bool change_cookie = (fm->modify_cookie + && fm->new_cookie != OVS_BE64_MAX + && fm->new_cookie != old_rule->flow_cookie); + + bool change_actions = !ofpacts_equal(fm->ofpacts, + fm->ofpacts_len, + old_actions->ofpacts, + old_actions->ofpacts_len); + + if (event != NXFME_MODIFIED || change_actions || change_cookie) { + ofmonitor_report(ofproto->connmgr, new_rule, event, 0, + req ? req->ofconn : NULL, + req ? req->request->xid : 0, + change_actions ? old_actions : NULL); + } + } else { + /* XXX: This is slight duplication with delete_flows_finish__() */ + + /* XXX: This call should done when rule's refcount reaches + * zero to get accurate stats in the flow removed message. */ + ofproto_rule_send_removed(old_rule, OFPRR_EVICTION); + + ofmonitor_report(ofproto->connmgr, old_rule, NXFME_DELETED, + OFPRR_EVICTION, req ? req->ofconn : NULL, - req ? req->request->xid : 0, - change_actions ? old_actions : NULL); + req ? req->request->xid : 0, NULL); } } } @@ -4784,7 +4812,8 @@ modify_flows_start__(struct ofproto *ofproto, struct ofputil_flow_mod *fm, error = add_flow_start(ofproto, fm, &old_rules->rules[0], &new_rules->rules[0]); if (!error) { - ovs_assert(!old_rules->rules[0]); + ovs_assert(fm->delete_reason == OFPRR_EVICTION + || !old_rules->rules[0]); } new_rules->n = 1; } else { @@ -4826,13 +4855,14 @@ modify_flows_start_loose(struct ofproto *ofproto, struct ofputil_flow_mod *fm, } static void -modify_flows_revert(struct ofproto *ofproto, struct rule_collection *old_rules, +modify_flows_revert(struct ofproto *ofproto, struct ofputil_flow_mod *fm, + struct rule_collection *old_rules, struct rule_collection *new_rules) OVS_REQUIRES(ofproto_mutex) { /* Old rules were not changed yet, only need to revert new rules. */ if (old_rules->n == 0 && new_rules->n == 1) { - add_flow_revert(ofproto, new_rules->rules[0], NULL); + add_flow_revert(ofproto, fm, old_rules->rules[0], new_rules->rules[0]); } else if (old_rules->n > 0) { for (size_t i = 0; i < old_rules->n; i++) { replace_rule_revert(ofproto, old_rules->rules[i], @@ -5042,6 +5072,7 @@ delete_flow_start_strict(struct ofproto *ofproto, return error; } +/* XXX: This should be sent right when the rule refcount gets to zero! */ static void ofproto_rule_send_removed(struct rule *rule, uint8_t reason) OVS_REQUIRES(ofproto_mutex) @@ -6573,12 +6604,13 @@ do_bundle_flow_mod_revert(struct ofproto *ofproto, struct ofputil_flow_mod *fm, { switch (fm->command) { case OFPFC_ADD: - add_flow_revert(ofproto, be->old_rules.stub[0], be->new_rules.stub[0]); + add_flow_revert(ofproto, fm, be->old_rules.stub[0], + be->new_rules.stub[0]); break; case OFPFC_MODIFY: case OFPFC_MODIFY_STRICT: - modify_flows_revert(ofproto, &be->old_rules, &be->new_rules); + modify_flows_revert(ofproto, fm, &be->old_rules, &be->new_rules); break; case OFPFC_DELETE: @@ -7449,7 +7481,8 @@ ofproto_rule_insert__(struct ofproto *ofproto, struct rule *rule) rule->removed = false; } -/* Removes 'rule' from the ofproto data structures. */ +/* Removes 'rule' from the ofproto data structures. Caller may have deferred + * the removal from the classifier. */ static void ofproto_rule_remove__(struct ofproto *ofproto, struct rule *rule) OVS_REQUIRES(ofproto_mutex) From e815de429c66a6c73d0605e36bf9be4eeec785ce Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Mon, 8 Jun 2015 22:57:09 -0700 Subject: [PATCH 144/146] vtep-ctl: Fix a bug. add_port_to_cache() uses 'cache_name' as the shash node name for shash_add(). So, the del_cached_port() must also pass 'cache_name' as argument for shash_find_and_delete(). This bug does not cause any issue currently but should be fixed. Signed-off-by: Alex Wang Acked-by: Justin Pettit --- vtep/vtep-ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vtep/vtep-ctl.c b/vtep/vtep-ctl.c index ead22ea39..a501a9825 100644 --- a/vtep/vtep-ctl.c +++ b/vtep/vtep-ctl.c @@ -798,7 +798,7 @@ del_cached_port(struct vtep_ctl_context *ctx, struct vtep_ctl_port *port) char *cache_name = xasprintf("%s+%s", port->ps->name, port->port_cfg->name); list_remove(&port->ports_node); - shash_find_and_delete(&ctx->ports, port->port_cfg->name); + shash_find_and_delete(&ctx->ports, cache_name); vteprec_physical_port_delete(port->port_cfg); free(cache_name); free(port); From 2541d75983cb6a48f0303ab96ec2a1be1b0ccbe7 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 11 Jun 2015 17:28:37 -0700 Subject: [PATCH 145/146] rculist: Remove postponed poisoning. Postponed 'next' member poisoning was based on the faulty assumption that postponed functions would be called in the order they were postponed. This assumption holds only for the functions postponed by any single thread. When functions are postponed by different threads, there are no guarantees of the order in which the functions may be called, or timing between those calls after the next grace period has passed. Given this, the postponed poisoning could have executed after postponed destruction of the object containing the rculist element. This bug was revealed after the memory leaks on rule deletion were recently fixed. This patch removes the postponed 'next' member poisoning and adds documentation describing the ordering limitations in OVS RCU. Alex Wang dug out the root cause of the resulting crashes, thanks! Signed-off-by: Jarno Rajahalme Acked-by: Alex Wang --- lib/automake.mk | 1 - lib/classifier.c | 8 ++++---- lib/ovs-rcu.c | 13 +++++++++++++ lib/ovs-rcu.h | 15 ++++++++++++++- lib/rculist.c | 27 --------------------------- lib/rculist.h | 36 +++++++++++++++++------------------- 6 files changed, 48 insertions(+), 52 deletions(-) delete mode 100644 lib/rculist.c diff --git a/lib/automake.mk b/lib/automake.mk index 7a34c1a0b..f0821152d 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -197,7 +197,6 @@ lib_libopenvswitch_la_SOURCES = \ lib/random.h \ lib/rconn.c \ lib/rconn.h \ - lib/rculist.c \ lib/rculist.h \ lib/reconnect.c \ lib/reconnect.h \ diff --git a/lib/classifier.c b/lib/classifier.c index 5f92f0514..66a2655cb 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -241,14 +241,14 @@ cls_rule_move(struct cls_rule *dst, struct cls_rule *src) * ('rule' must not currently be in a classifier.) */ void cls_rule_destroy(struct cls_rule *rule) + OVS_NO_THREAD_SAFETY_ANALYSIS { ovs_assert(!rule->cls_match); /* Must not be in a classifier. */ - /* Check that the rule has been properly removed from the classifier and - * that the destruction only happens after the RCU grace period, or that - * the rule was never inserted to the classifier in the first place. */ - ovs_assert(rculist_next_protected(&rule->node) == RCULIST_POISON + /* Check that the rule has been properly removed from the classifier. */ + ovs_assert(rule->node.prev == RCULIST_POISON || rculist_is_empty(&rule->node)); + rculist_poison__(&rule->node); /* Poisons also the next pointer. */ minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match)); } diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index e0634cfab..b8f8bc481 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -212,6 +212,19 @@ ovsrcu_synchronize(void) /* Registers 'function' to be called, passing 'aux' as argument, after the * next grace period. * + * The call is guaranteed to happen after the next time all participating + * threads have quiesced at least once, but there is no quarantee that all + * registered functions are called as early as possible, or that the functions + * registered by different threads would be called in the order the + * registrations took place. In particular, even if two threads provably + * register a function each in a specific order, the functions may still be + * called in the opposite order, depending on the timing of when the threads + * call ovsrcu_quiesce(), how many functions they postpone, and when the + * ovs-rcu thread happens to grab the functions to be called. + * + * All functions registered by a single thread are guaranteed to execute in the + * registering order, however. + * * This function is more conveniently called through the ovsrcu_postpone() * macro, which provides a type-safe way to allow 'function''s parameter to be * any pointer type. */ diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index 1d79976a3..c1e3d6061 100644 --- a/lib/ovs-rcu.h +++ b/lib/ovs-rcu.h @@ -60,12 +60,25 @@ * * When a quiescient state has occurred in every thread, we say that a "grace * period" has occurred. Following a grace period, all of the callbacks - * postponed before the start of the grace period may be invoked. OVS takes + * postponed before the start of the grace period MAY be invoked. OVS takes * care of this automatically through the RCU mechanism: while a process still * has only a single thread, it invokes the postponed callbacks directly from * ovsrcu_quiesce() and ovsrcu_quiesce_start(); after additional threads have * been created, it creates an extra helper thread to invoke callbacks. * + * Please note that while a postponed function call is guaranteed to happen + * after the next time all participating threads have quiesced at least once, + * there is no quarantee that all postponed functions are called as early as + * possible, or that the functions postponed by different threads would be + * called in the order the registrations took place. In particular, even if + * two threads provably postpone a function each in a specific order, the + * postponed functions may still be called in the opposite order, depending on + * the timing of when the threads call ovsrcu_quiesce(), how many functions + * they postpone, and when the ovs-rcu thread happens to grab the functions to + * be called. + * + * All functions postponed by a single thread are guaranteed to execute in the + * order they were postponed, however. * * Use * --- diff --git a/lib/rculist.c b/lib/rculist.c deleted file mode 100644 index 61a03d05b..000000000 --- a/lib/rculist.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include "rculist.h" - -/* Initializes 'list' with pointers that will (probably) cause segfaults if - * dereferenced and, better yet, show up clearly in a debugger. */ -void -rculist_poison__(struct rculist *list) - OVS_NO_THREAD_SAFETY_ANALYSIS -{ - list->prev = RCULIST_POISON; - ovsrcu_set_hidden(&list->next, RCULIST_POISON); -} diff --git a/lib/rculist.h b/lib/rculist.h index f3c1475de..7ba20e5f2 100644 --- a/lib/rculist.h +++ b/lib/rculist.h @@ -38,10 +38,7 @@ * - rculist_front() returns a const pointer to accommodate for an RCU reader. * - rculist_splice_hidden(): Spliced elements may not have been visible to * RCU readers before the operation. - * - rculist_poison(): Immediately poisons the 'prev' pointer, and schedules - * ovsrcu_postpone() to poison the 'next' pointer. This issues a memory - * write operation to the list element, hopefully crashing the program if - * the list node was freed or re-used too early. + * - rculist_poison(): Only poisons the 'prev' pointer. * * The following functions are variations of the struct ovs_list functions with * similar names, but are now restricted to the writer use: @@ -134,8 +131,6 @@ rculist_init(struct rculist *list) #define RCULIST_POISON (struct rculist *)(UINTPTR_MAX / 0xf * 0xc) -void rculist_poison__(struct rculist *list); - /* Initializes 'list' with pointers that will (probably) cause segfaults if * dereferenced and, better yet, show up clearly in a debugger. */ static inline void @@ -143,7 +138,19 @@ rculist_poison(struct rculist *list) OVS_NO_THREAD_SAFETY_ANALYSIS { list->prev = RCULIST_POISON; - ovsrcu_postpone(rculist_poison__, list); +} + +/* Initializes 'list' with pointers that will (probably) cause segfaults if + * dereferenced and, better yet, show up clearly in a debugger. + * + * This variant poisons also the next pointer, so this may not be called if + * this list element is still visible to RCU readers. */ +static inline void +rculist_poison__(struct rculist *list) + OVS_NO_THREAD_SAFETY_ANALYSIS +{ + rculist_poison(list); + ovsrcu_set_hidden(&list->next, RCULIST_POISON); } /* rculist insertion. */ @@ -217,10 +224,7 @@ rculist_replace(struct rculist *element, struct rculist *position) position_next->prev = element; element->prev = position->prev; ovsrcu_set(&element->prev->next, element); - -#ifndef NDEBUG - rculist_poison(position); /* XXX: Some overhead due to ovsrcu_postpone() */ -#endif + rculist_poison(position); } /* Initializes 'dst' with the contents of 'src', compensating for moving it @@ -244,10 +248,7 @@ rculist_move(struct rculist *dst, struct rculist *src) } else { rculist_init(dst); } - -#ifndef NDEBUG - rculist_poison(src); /* XXX: Some overhead due to ovsrcu_postpone() */ -#endif + rculist_poison(src); } /* Removes 'elem' from its list and returns the element that followed it. @@ -268,10 +269,7 @@ rculist_remove(struct rculist *elem) elem_next->prev = elem->prev; ovsrcu_set(&elem->prev->next, elem_next); - -#ifndef NDEBUG - rculist_poison(elem); /* XXX: Some overhead due to ovsrcu_postpone() */ -#endif + rculist_poison(elem); return elem_next; } From be9d0de7a40459507b0620bc7bc352f7b6b0942d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 12 Jun 2015 10:58:27 -0700 Subject: [PATCH 146/146] ovs-rcu: Comment fixes. A comment referred to a "Usage" section but the section was named "Use". This fixes the problem (also a grammar error). Signed-off-by: Ben Pfaff Acked-by: Alex Wang --- lib/ovs-rcu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index c1e3d6061..5cd41528c 100644 --- a/lib/ovs-rcu.h +++ b/lib/ovs-rcu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Nicira, Inc. + * Copyright (c) 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,8 +80,8 @@ * All functions postponed by a single thread are guaranteed to execute in the * order they were postponed, however. * - * Use - * --- + * Usage + * ----- * * Use OVSRCU_TYPE(TYPE) to declare a pointer to RCU-protected data, e.g. the * following declares an RCU-protected "struct flow *" named flowp: @@ -204,7 +204,7 @@ static inline void ovsrcu_set__(struct ovsrcu_pointer *pointer, #define ovsrcu_init(VAR, VALUE) atomic_init(&(VAR)->p, VALUE) /* Calls FUNCTION passing ARG as its pointer-type argument following the next - * grace period. See "Usage" above for example. */ + * grace period. See "Usage" above for an example. */ void ovsrcu_postpone__(void (*function)(void *aux), void *aux); #define ovsrcu_postpone(FUNCTION, ARG) \ ((void) sizeof((FUNCTION)(ARG), 1), \