openflow: Better abstract handling of packet-in messages.
Packet-in messages have been a bit of a mess. First, their abstraction
in the form of struct ofputil_packet_in has some fields that are used
in a clear way for incoming and outgoing packet-ins, and others
(packet_len, total_len, buffer_id) have have confusing meanings or
usage pattern depending on their direction.
Second, it's very confusing how a packet-in has both a reason (OFPR_*)
and a miss type (OFPROTO_PACKET_IN_*) and how those add up to the
actual reason that is used "on the wire" for each OpenFlow version (and
even whether the packet-in is sent at all!).
Finally, there's all kind of low-level detail randomly scattered between
connmgr, ofproto-dpif-xlate, and ofp-util.
This commit attempts to clear up some of the confusion. It simplifies
the struct ofputil_packet_in abstraction by removing the members that
didn't have a clear and consistent meaning between incoming and outgoing
packet-ins. It gets rid of OFPROTO_PACKET_IN_*, instead adding a couple
of nonstandard OFPR_* reasons that add up to what OFPROTO_PACKET_IN_*
was meant to say (in what I hope is a clearer way). And it consolidates
the tricky parts into ofp-util, where I hope it will be easier to
understand all in one place.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-20 09:57:16 -08:00
|
|
|
|
/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
|
2013-06-11 13:32:30 -07:00
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License. */
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
|
|
#include "ofproto/ofproto-dpif-xlate.h"
|
|
|
|
|
|
2013-08-02 12:43:03 -07:00
|
|
|
|
#include <errno.h>
|
2014-11-11 11:53:47 -08:00
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
|
#include <net/if.h>
|
|
|
|
|
#include <sys/socket.h>
|
|
|
|
|
#include <netinet/in.h>
|
2013-08-02 12:43:03 -07:00
|
|
|
|
|
2013-06-17 17:56:54 -07:00
|
|
|
|
#include "bfd.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "bitmap.h"
|
|
|
|
|
#include "bond.h"
|
|
|
|
|
#include "bundle.h"
|
|
|
|
|
#include "byte-order.h"
|
2013-06-17 17:56:54 -07:00
|
|
|
|
#include "cfm.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "connmgr.h"
|
|
|
|
|
#include "coverage.h"
|
2015-02-25 12:01:53 -08:00
|
|
|
|
#include "dp-packet.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "dpif.h"
|
2013-06-22 10:48:42 -07:00
|
|
|
|
#include "in-band.h"
|
2013-06-17 17:56:54 -07:00
|
|
|
|
#include "lacp.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "learn.h"
|
|
|
|
|
#include "mac-learning.h"
|
2014-06-18 22:14:31 -03:00
|
|
|
|
#include "mcast-snooping.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "multipath.h"
|
|
|
|
|
#include "netdev-vport.h"
|
|
|
|
|
#include "netlink.h"
|
|
|
|
|
#include "nx-match.h"
|
|
|
|
|
#include "odp-execute.h"
|
|
|
|
|
#include "ofproto/ofproto-dpif-ipfix.h"
|
2013-06-20 13:00:27 -07:00
|
|
|
|
#include "ofproto/ofproto-dpif-mirror.h"
|
2013-12-20 14:53:52 -08:00
|
|
|
|
#include "ofproto/ofproto-dpif-monitor.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "ofproto/ofproto-dpif-sflow.h"
|
|
|
|
|
#include "ofproto/ofproto-dpif.h"
|
2013-09-09 13:05:52 -07:00
|
|
|
|
#include "ofproto/ofproto-provider.h"
|
2016-04-14 15:20:19 -07:00
|
|
|
|
#include "openvswitch/dynamic-string.h"
|
|
|
|
|
#include "openvswitch/meta-flow.h"
|
|
|
|
|
#include "openvswitch/list.h"
|
|
|
|
|
#include "openvswitch/ofp-actions.h"
|
|
|
|
|
#include "openvswitch/vlog.h"
|
|
|
|
|
#include "ovs-lldp.h"
|
2014-11-11 11:53:47 -08:00
|
|
|
|
#include "ovs-router.h"
|
2016-04-14 15:20:19 -07:00
|
|
|
|
#include "packets.h"
|
|
|
|
|
#include "tnl-neigh-cache.h"
|
2014-11-11 11:53:47 -08:00
|
|
|
|
#include "tnl-ports.h"
|
2013-06-11 13:32:30 -07:00
|
|
|
|
#include "tunnel.h"
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
COVERAGE_DEFINE(xlate_actions);
|
2013-11-02 08:43:14 -07:00
|
|
|
|
COVERAGE_DEFINE(xlate_actions_oversize);
|
2014-09-09 15:06:52 -07:00
|
|
|
|
COVERAGE_DEFINE(xlate_actions_too_many_output);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
|
|
|
|
|
|
2013-06-13 18:10:00 -07:00
|
|
|
|
/* Maximum depth of flow table recursion (due to resubmit actions) in a
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
* flow translation.
|
|
|
|
|
*
|
|
|
|
|
* The goal of limiting the depth of resubmits is to ensure that flow
|
|
|
|
|
* translation eventually terminates. Only resubmits to the same table or an
|
|
|
|
|
* earlier table count against the maximum depth. This is because resubmits to
|
|
|
|
|
* strictly monotonically increasing table IDs will eventually terminate, since
|
|
|
|
|
* any OpenFlow switch has a finite number of tables. OpenFlow tables are most
|
|
|
|
|
* commonly traversed in numerically increasing order, so this limit has little
|
|
|
|
|
* effect on conventionally designed OpenFlow pipelines.
|
|
|
|
|
*
|
|
|
|
|
* Outputs to patch ports and to groups also count against the depth limit. */
|
|
|
|
|
#define MAX_DEPTH 64
|
2013-06-13 18:10:00 -07:00
|
|
|
|
|
2013-10-04 08:47:16 -07:00
|
|
|
|
/* Maximum number of resubmit actions in a flow translation, whether they are
|
|
|
|
|
* recursive or not. */
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
#define MAX_RESUBMITS (MAX_DEPTH * MAX_DEPTH)
|
2013-10-04 08:47:16 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbridge {
|
|
|
|
|
struct hmap_node hmap_node; /* Node in global 'xbridges' map. */
|
|
|
|
|
struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */
|
|
|
|
|
|
2014-12-15 14:10:38 +01:00
|
|
|
|
struct ovs_list xbundles; /* Owned xbundles. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct hmap xports; /* Indexed by ofp_port. */
|
|
|
|
|
|
|
|
|
|
char *name; /* Name used in log messages. */
|
2013-07-06 11:46:48 -07:00
|
|
|
|
struct dpif *dpif; /* Datapath interface. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct mac_learning *ml; /* Mac learning handle. */
|
2014-06-18 22:14:31 -03:00
|
|
|
|
struct mcast_snooping *ms; /* Multicast Snooping handle. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct mbridge *mbridge; /* Mirroring. */
|
|
|
|
|
struct dpif_sflow *sflow; /* SFlow handle, or null. */
|
|
|
|
|
struct dpif_ipfix *ipfix; /* Ipfix handle, or null. */
|
2013-10-30 16:29:58 -07:00
|
|
|
|
struct netflow *netflow; /* Netflow handle, or null. */
|
2013-07-06 09:31:35 -07:00
|
|
|
|
struct stp *stp; /* STP or null if disabled. */
|
2014-08-22 09:01:34 -07:00
|
|
|
|
struct rstp *rstp; /* RSTP or null if disabled. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
|
|
|
|
bool has_in_band; /* Bridge has in band control? */
|
|
|
|
|
bool forward_bpdu; /* Bridge forwards STP BPDUs? */
|
2013-12-30 14:49:25 -08:00
|
|
|
|
|
2015-02-24 16:40:42 -08:00
|
|
|
|
/* Datapath feature support. */
|
|
|
|
|
struct dpif_backer_support support;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct xbundle {
|
|
|
|
|
struct hmap_node hmap_node; /* In global 'xbundles' map. */
|
|
|
|
|
struct ofbundle *ofbundle; /* Key in global 'xbundles' map. */
|
|
|
|
|
|
2014-12-15 14:10:38 +01:00
|
|
|
|
struct ovs_list list_node; /* In parent 'xbridges' list. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbridge *xbridge; /* Parent xbridge. */
|
|
|
|
|
|
2014-12-15 14:10:38 +01:00
|
|
|
|
struct ovs_list xports; /* Contains "struct xport"s. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
|
|
|
|
char *name; /* Name used in log messages. */
|
|
|
|
|
struct bond *bond; /* Nonnull iff more than one port. */
|
|
|
|
|
struct lacp *lacp; /* LACP handle or null. */
|
|
|
|
|
|
|
|
|
|
enum port_vlan_mode vlan_mode; /* VLAN mode. */
|
|
|
|
|
int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */
|
|
|
|
|
unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1.
|
|
|
|
|
* NULL if all VLANs are trunked. */
|
|
|
|
|
bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */
|
|
|
|
|
bool floodable; /* No port has OFPUTIL_PC_NO_FLOOD set? */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct xport {
|
|
|
|
|
struct hmap_node hmap_node; /* Node in global 'xports' map. */
|
|
|
|
|
struct ofport_dpif *ofport; /* Key in global 'xports map. */
|
|
|
|
|
|
|
|
|
|
struct hmap_node ofp_node; /* Node in parent xbridge 'xports' map. */
|
|
|
|
|
ofp_port_t ofp_port; /* Key in parent xbridge 'xports' map. */
|
|
|
|
|
|
|
|
|
|
odp_port_t odp_port; /* Datapath port number or ODPP_NONE. */
|
|
|
|
|
|
2014-12-15 14:10:38 +01:00
|
|
|
|
struct ovs_list bundle_node; /* In parent xbundle (if it exists). */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbundle *xbundle; /* Parent xbundle or null. */
|
|
|
|
|
|
|
|
|
|
struct netdev *netdev; /* 'ofport''s netdev. */
|
|
|
|
|
|
|
|
|
|
struct xbridge *xbridge; /* Parent bridge. */
|
|
|
|
|
struct xport *peer; /* Patch port peer or null. */
|
|
|
|
|
|
|
|
|
|
enum ofputil_port_config config; /* OpenFlow port configuration. */
|
2013-10-30 18:17:18 +09:00
|
|
|
|
enum ofputil_port_state state; /* OpenFlow port state. */
|
2013-09-04 15:21:15 -07:00
|
|
|
|
int stp_port_no; /* STP port number or -1 if not in use. */
|
2014-09-09 11:11:18 -07:00
|
|
|
|
struct rstp_port *rstp_port; /* RSTP port or null. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
2013-07-06 10:25:06 -07:00
|
|
|
|
struct hmap skb_priorities; /* Map of 'skb_priority_to_dscp's. */
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
bool may_enable; /* May be enabled in bonds. */
|
|
|
|
|
bool is_tunnel; /* Is a tunnel port. */
|
|
|
|
|
|
|
|
|
|
struct cfm *cfm; /* CFM handle or null. */
|
|
|
|
|
struct bfd *bfd; /* BFD handle or null. */
|
2015-02-20 14:17:10 -05:00
|
|
|
|
struct lldp *lldp; /* LLDP handle or null. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
};
|
|
|
|
|
|
2013-06-12 12:51:52 -07:00
|
|
|
|
struct xlate_ctx {
|
|
|
|
|
struct xlate_in *xin;
|
|
|
|
|
struct xlate_out *xout;
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
const struct xbridge *xbridge;
|
2013-06-12 12:51:52 -07:00
|
|
|
|
|
2015-06-11 15:53:43 -07:00
|
|
|
|
/* Flow tables version at the beginning of the translation. */
|
2015-06-12 16:12:56 -07:00
|
|
|
|
cls_version_t tables_version;
|
2015-06-11 15:53:43 -07:00
|
|
|
|
|
2013-06-12 12:51:52 -07:00
|
|
|
|
/* Flow at the last commit. */
|
|
|
|
|
struct flow base_flow;
|
|
|
|
|
|
|
|
|
|
/* Tunnel IP destination address as received. This is stored separately
|
|
|
|
|
* as the base_flow.tunnel is cleared on init to reflect the datapath
|
|
|
|
|
* behavior. Used to make sure not to send tunneled output to ourselves,
|
|
|
|
|
* which might lead to an infinite loop. This could happen easily
|
|
|
|
|
* if a tunnel is marked as 'ip_remote=flow', and the flow does not
|
|
|
|
|
* actually set the tun_dst field. */
|
2015-11-25 11:31:09 -02:00
|
|
|
|
struct in6_addr orig_tunnel_ipv6_dst;
|
2013-06-12 12:51:52 -07:00
|
|
|
|
|
|
|
|
|
/* Stack for the push and pop actions. Each stack element is of type
|
|
|
|
|
* "union mf_subvalue". */
|
|
|
|
|
struct ofpbuf stack;
|
|
|
|
|
|
|
|
|
|
/* The rule that we are currently translating, or NULL. */
|
|
|
|
|
struct rule_dpif *rule;
|
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
/* Flow translation populates this with wildcards relevant in translation.
|
|
|
|
|
* When 'xin->wc' is nonnull, this is the same pointer. When 'xin->wc' is
|
2016-04-22 17:45:03 -07:00
|
|
|
|
* null, this is a pointer to a temporary buffer. */
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc;
|
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
/* Output buffer for datapath actions. When 'xin->odp_actions' is nonnull,
|
|
|
|
|
* this is the same pointer. When 'xin->odp_actions' is null, this points
|
|
|
|
|
* to a scratch ofpbuf. This allows code to add actions to
|
|
|
|
|
* 'ctx->odp_actions' without worrying about whether the caller really
|
|
|
|
|
* wants actions. */
|
|
|
|
|
struct ofpbuf *odp_actions;
|
|
|
|
|
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
/* Statistics maintained by xlate_table_action().
|
|
|
|
|
*
|
|
|
|
|
* 'indentation' is the nesting level for resubmits. It is used to indent
|
|
|
|
|
* the output of resubmit_hook (e.g. for the "ofproto/trace" feature).
|
|
|
|
|
*
|
|
|
|
|
* The other statistics limit the amount of work that a single flow
|
|
|
|
|
* translation can perform. The goal of the first of these, 'depth', is
|
|
|
|
|
* primarily to prevent translation from performing an infinite amount of
|
|
|
|
|
* work. It counts the current depth of nested "resubmit"s (and a few
|
|
|
|
|
* other activities); when a resubmit returns, it decreases. Resubmits to
|
|
|
|
|
* tables in strictly monotonically increasing order don't contribute to
|
|
|
|
|
* 'depth' because they cannot cause a flow translation to take an infinite
|
|
|
|
|
* amount of time (because the number of tables is finite). Translation
|
|
|
|
|
* aborts when 'depth' exceeds MAX_DEPTH.
|
|
|
|
|
*
|
|
|
|
|
* 'resubmits', on the other hand, prevents flow translation from
|
|
|
|
|
* performing an extraordinarily large while still finite amount of work.
|
|
|
|
|
* It counts the total number of resubmits (and a few other activities)
|
|
|
|
|
* that have been executed. Returning from a resubmit does not affect this
|
|
|
|
|
* counter. Thus, this limits the amount of work that a particular
|
|
|
|
|
* translation can perform. Translation aborts when 'resubmits' exceeds
|
|
|
|
|
* MAX_RESUBMITS (which is much larger than MAX_DEPTH).
|
|
|
|
|
*/
|
|
|
|
|
int indentation; /* Indentation level for resubmit_hook. */
|
|
|
|
|
int depth; /* Current resubmit nesting depth. */
|
2013-10-04 08:47:16 -07:00
|
|
|
|
int resubmits; /* Total number of resubmits. */
|
2014-02-21 16:27:00 -08:00
|
|
|
|
bool in_group; /* Currently translating ofgroup, if true. */
|
2015-02-03 22:24:18 -08:00
|
|
|
|
bool in_action_set; /* Currently translating action_set, if true. */
|
2013-10-04 08:47:16 -07:00
|
|
|
|
|
2013-06-12 12:51:52 -07:00
|
|
|
|
uint8_t table_id; /* OpenFlow table ID where flow was found. */
|
2015-03-13 13:27:19 -07:00
|
|
|
|
ovs_be64 rule_cookie; /* Cookie of the rule being translated. */
|
|
|
|
|
uint32_t orig_skb_priority; /* Priority when packet arrived. */
|
2013-06-12 12:51:52 -07:00
|
|
|
|
uint32_t sflow_n_outputs; /* Number of output ports. */
|
2013-06-19 16:58:44 -07:00
|
|
|
|
odp_port_t sflow_odp_port; /* Output port for composing sFlow action. */
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
|
2013-06-12 12:51:52 -07:00
|
|
|
|
bool exit; /* No further actions should be processed. */
|
2015-07-23 17:08:14 -07:00
|
|
|
|
mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
|
2013-10-11 13:23:29 +09:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Freezing Translation
|
|
|
|
|
* ====================
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* At some point during translation, the code may recognize the need to halt
|
|
|
|
|
* and checkpoint the translation in a way that it can be restarted again
|
|
|
|
|
* later. We call the checkpointing process "freezing" and the restarting
|
|
|
|
|
* process "thawing".
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* The use cases for freezing are:
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* - "Recirculation", where the translation process discovers that it
|
|
|
|
|
* doesn't have enough information to complete translation without
|
|
|
|
|
* actually executing the actions that have already been translated,
|
|
|
|
|
* which provides the additionally needed information. In these
|
|
|
|
|
* situations, translation freezes translation and assigns the frozen
|
|
|
|
|
* data a unique "recirculation ID", which it associates with the data
|
|
|
|
|
* in a table in userspace (see ofproto-dpif-rid.h). It also adds a
|
|
|
|
|
* OVS_ACTION_ATTR_RECIRC action specifying that ID to the datapath
|
|
|
|
|
* actions. When a packet hits that action, the datapath looks its
|
|
|
|
|
* flow up again using the ID. If there's a miss, it comes back to
|
|
|
|
|
* userspace, which find the recirculation table entry for the ID,
|
|
|
|
|
* thaws the associated frozen data, and continues translation from
|
|
|
|
|
* that point given the additional information that is now known.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* The archetypal example is MPLS. As MPLS is implemented in
|
|
|
|
|
* OpenFlow, the protocol that follows the last MPLS label becomes
|
|
|
|
|
* known only when that label is popped by an OpenFlow action. That
|
|
|
|
|
* means that Open vSwitch can't extract the headers beyond the MPLS
|
|
|
|
|
* labels until the pop action is executed. Thus, at that point
|
|
|
|
|
* translation uses the recirculation process to extract the headers
|
|
|
|
|
* beyond the MPLS labels.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* (OVS also uses OVS_ACTION_ATTR_RECIRC to implement hashing for
|
|
|
|
|
* output to bonds. OVS pre-populates all the datapath flows for bond
|
|
|
|
|
* output in the datapath, though, which means that the elaborate
|
|
|
|
|
* process of coming back to userspace for a second round of
|
|
|
|
|
* translation isn't needed, and so bonds don't follow the above
|
|
|
|
|
* process.)
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
* - "Continuation". A continuation is a way for an OpenFlow controller
|
|
|
|
|
* to interpose on a packet's traversal of the OpenFlow tables. When
|
|
|
|
|
* the translation process encounters a "controller" action with the
|
|
|
|
|
* "pause" flag, it freezes translation, serializes the frozen data,
|
|
|
|
|
* and sends it to an OpenFlow controller. The controller then
|
|
|
|
|
* examines and possibly modifies the frozen data and eventually sends
|
|
|
|
|
* it back to the switch, which thaws it and continues translation.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* The main problem of freezing translation is preserving state, so that
|
|
|
|
|
* when the translation is thawed later it resumes from where it left off,
|
|
|
|
|
* without disruption. In particular, actions must be preserved as follows:
|
|
|
|
|
*
|
|
|
|
|
* - If we're freezing because an action needed more information, the
|
|
|
|
|
* action that prompted it.
|
|
|
|
|
*
|
|
|
|
|
* - Any actions remaining to be translated within the current flow.
|
|
|
|
|
*
|
|
|
|
|
* - If translation was frozen within a NXAST_RESUBMIT, then any actions
|
|
|
|
|
* following the resubmit action. Resubmit actions can be nested, so
|
|
|
|
|
* this has to go all the way up the control stack.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
|
|
|
|
* - The OpenFlow 1.1+ action set.
|
|
|
|
|
*
|
|
|
|
|
* State that actions and flow table lookups can depend on, such as the
|
|
|
|
|
* following, must also be preserved:
|
|
|
|
|
*
|
|
|
|
|
* - Metadata fields (input port, registers, OF1.1+ metadata, ...).
|
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* - The stack used by NXAST_STACK_PUSH and NXAST_STACK_POP actions.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
|
|
|
|
* - The table ID and cookie of the flow being translated at each level
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* of the control stack, because these can become visible through
|
|
|
|
|
* OFPAT_CONTROLLER actions (and other ways).
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
|
|
|
|
* Translation allows for the control of this state preservation via these
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* members. When a need to freeze translation is identified, the
|
|
|
|
|
* translation process:
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* 1. Sets 'freezing' to true.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
|
|
|
|
* 2. Sets 'exit' to true to tell later steps that we're exiting from the
|
|
|
|
|
* translation process.
|
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* 3. Adds an OFPACT_UNROLL_XLATE action to 'frozen_actions', and points
|
|
|
|
|
* frozen_actions.header to the action to make it easy to find it later.
|
|
|
|
|
* This action holds the current table ID and cookie so that they can be
|
|
|
|
|
* restored during a post-recirculation upcall translation.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
|
|
|
|
* 4. Adds the action that prompted recirculation and any actions following
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* it within the same flow to 'frozen_actions', so that they can be
|
2016-01-28 17:11:19 -08:00
|
|
|
|
* executed during a post-recirculation upcall translation.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
|
|
|
|
* 5. Returns.
|
|
|
|
|
*
|
|
|
|
|
* 6. The action that prompted recirculation might be nested in a stack of
|
|
|
|
|
* nested "resubmit"s that have actions remaining. Each of these notices
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* that we're exiting and freezing and responds by adding more
|
|
|
|
|
* OFPACT_UNROLL_XLATE actions to 'frozen_actions', as necessary,
|
|
|
|
|
* followed by any actions that were yet unprocessed.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* If we're freezing because of recirculation, the caller generates a
|
|
|
|
|
* recirculation ID and associates all the state produced by this process
|
|
|
|
|
* with it. For post-recirculation upcall translation, the caller passes it
|
|
|
|
|
* back in for the new translation to execute. The process yielded a set of
|
|
|
|
|
* ofpacts that can be translated directly, so it is not much of a special
|
|
|
|
|
* case at that point.
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*/
|
2016-02-16 10:51:58 -08:00
|
|
|
|
bool freezing;
|
|
|
|
|
struct ofpbuf frozen_actions;
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
const struct ofpact_controller *pause;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
/* True if a packet was but is no longer MPLS (due to an MPLS pop action).
|
|
|
|
|
* This is a trigger for recirculation in cases where translating an action
|
|
|
|
|
* or looking up a flow requires access to the fields of the packet after
|
|
|
|
|
* the MPLS label stack that was originally present. */
|
|
|
|
|
bool was_mpls;
|
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
/* True if conntrack has been performed on this packet during processing
|
|
|
|
|
* on the current bridge. This is used to determine whether conntrack
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* state from the datapath should be honored after thawing. */
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
bool conntracked;
|
|
|
|
|
|
2015-11-24 15:47:56 -08:00
|
|
|
|
/* Pointer to an embedded NAT action in a conntrack action, or NULL. */
|
|
|
|
|
struct ofpact_nat *ct_nat_action;
|
|
|
|
|
|
2013-10-11 13:23:29 +09:00
|
|
|
|
/* OpenFlow 1.1+ action set.
|
|
|
|
|
*
|
|
|
|
|
* 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
|
|
|
|
|
* When translation is otherwise complete, ofpacts_execute_action_set()
|
|
|
|
|
* converts it to a set of "struct ofpact"s that can be translated into
|
2015-03-11 18:01:51 -07:00
|
|
|
|
* datapath actions. */
|
2014-11-03 14:24:01 -08:00
|
|
|
|
bool action_set_has_group; /* Action set contains OFPACT_GROUP? */
|
2013-10-11 13:23:29 +09:00
|
|
|
|
struct ofpbuf action_set; /* Action set. */
|
2015-11-25 15:19:37 -08:00
|
|
|
|
|
|
|
|
|
enum xlate_error error; /* Translation failed. */
|
2013-06-12 12:51:52 -07:00
|
|
|
|
};
|
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
const char *xlate_strerror(enum xlate_error error)
|
|
|
|
|
{
|
|
|
|
|
switch (error) {
|
|
|
|
|
case XLATE_OK:
|
|
|
|
|
return "OK";
|
|
|
|
|
case XLATE_BRIDGE_NOT_FOUND:
|
|
|
|
|
return "Bridge not found";
|
|
|
|
|
case XLATE_RECURSION_TOO_DEEP:
|
|
|
|
|
return "Recursion too deep";
|
|
|
|
|
case XLATE_TOO_MANY_RESUBMITS:
|
|
|
|
|
return "Too many resubmits";
|
|
|
|
|
case XLATE_STACK_TOO_DEEP:
|
|
|
|
|
return "Stack too deep";
|
|
|
|
|
case XLATE_NO_RECIRCULATION_CONTEXT:
|
|
|
|
|
return "No recirculation context";
|
|
|
|
|
case XLATE_RECIRCULATION_CONFLICT:
|
|
|
|
|
return "Recirculation conflict";
|
|
|
|
|
case XLATE_TOO_MANY_MPLS_LABELS:
|
|
|
|
|
return "Too many MPLS labels";
|
|
|
|
|
}
|
|
|
|
|
return "Unknown error";
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-11 18:01:51 -07:00
|
|
|
|
static void xlate_action_set(struct xlate_ctx *ctx);
|
2015-09-09 19:00:17 -07:00
|
|
|
|
static void xlate_commit_actions(struct xlate_ctx *ctx);
|
2015-03-11 18:01:51 -07:00
|
|
|
|
|
2015-03-26 11:18:17 -07:00
|
|
|
|
static void
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx_trigger_freeze(struct xlate_ctx *ctx)
|
2015-03-26 11:18:17 -07:00
|
|
|
|
{
|
|
|
|
|
ctx->exit = true;
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx->freezing = true;
|
2015-03-26 11:18:17 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx_first_frozen_action(const struct xlate_ctx *ctx)
|
2015-03-26 11:18:17 -07:00
|
|
|
|
{
|
2016-02-16 10:51:58 -08:00
|
|
|
|
return !ctx->frozen_actions.size;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-01-28 16:35:11 -08:00
|
|
|
|
static void
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx_cancel_freeze(struct xlate_ctx *ctx)
|
2016-01-28 16:35:11 -08:00
|
|
|
|
{
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (ctx->freezing) {
|
|
|
|
|
ctx->freezing = false;
|
|
|
|
|
ofpbuf_clear(&ctx->frozen_actions);
|
|
|
|
|
ctx->frozen_actions.header = NULL;
|
2016-01-28 16:35:11 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
static void finish_freezing(struct xlate_ctx *ctx);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
/* A controller may use OFPP_NONE as the ingress port to indicate that
|
|
|
|
|
* it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
|
|
|
|
|
* when an input bundle is needed for validation (e.g., mirroring or
|
|
|
|
|
* OFPP_NORMAL processing). It is not connected to an 'ofproto' or have
|
2014-03-19 17:34:55 -07:00
|
|
|
|
* any 'port' structs, so care must be taken when dealing with it. */
|
|
|
|
|
static struct xbundle ofpp_none_bundle = {
|
|
|
|
|
.name = "OFPP_NONE",
|
|
|
|
|
.vlan_mode = PORT_VLAN_TRUNK
|
|
|
|
|
};
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-07-06 10:25:06 -07:00
|
|
|
|
/* Node in 'xport''s 'skb_priorities' map. Used to maintain a map from
|
|
|
|
|
* 'priority' (the datapath's term for QoS queue) to the dscp bits which all
|
|
|
|
|
* traffic egressing the 'ofport' with that priority should be marked with. */
|
|
|
|
|
struct skb_priority_to_dscp {
|
|
|
|
|
struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
|
|
|
|
|
uint32_t skb_priority; /* Priority of this queue (see struct flow). */
|
|
|
|
|
|
|
|
|
|
uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */
|
|
|
|
|
};
|
|
|
|
|
|
2014-04-10 16:00:28 +12:00
|
|
|
|
enum xc_type {
|
|
|
|
|
XC_RULE,
|
|
|
|
|
XC_BOND,
|
|
|
|
|
XC_NETDEV,
|
|
|
|
|
XC_NETFLOW,
|
|
|
|
|
XC_MIRROR,
|
|
|
|
|
XC_LEARN,
|
|
|
|
|
XC_NORMAL,
|
|
|
|
|
XC_FIN_TIMEOUT,
|
2014-05-22 10:47:13 +00:00
|
|
|
|
XC_GROUP,
|
2015-11-30 16:24:49 -02:00
|
|
|
|
XC_TNL_NEIGH,
|
2014-04-10 16:00:28 +12:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* xlate_cache entries hold enough information to perform the side effects of
|
|
|
|
|
* xlate_actions() for a rule, without needing to perform rule translation
|
|
|
|
|
* from scratch. The primary usage of these is to submit statistics to objects
|
|
|
|
|
* that a flow relates to, although they may be used for other effects as well
|
|
|
|
|
* (for instance, refreshing hard timeouts for learned flows). */
|
|
|
|
|
struct xc_entry {
|
|
|
|
|
enum xc_type type;
|
|
|
|
|
union {
|
|
|
|
|
struct rule_dpif *rule;
|
|
|
|
|
struct {
|
|
|
|
|
struct netdev *tx;
|
|
|
|
|
struct netdev *rx;
|
|
|
|
|
struct bfd *bfd;
|
|
|
|
|
} dev;
|
|
|
|
|
struct {
|
|
|
|
|
struct netflow *netflow;
|
|
|
|
|
struct flow *flow;
|
|
|
|
|
ofp_port_t iface;
|
|
|
|
|
} nf;
|
|
|
|
|
struct {
|
|
|
|
|
struct mbridge *mbridge;
|
|
|
|
|
mirror_mask_t mirrors;
|
|
|
|
|
} mirror;
|
|
|
|
|
struct {
|
|
|
|
|
struct bond *bond;
|
|
|
|
|
struct flow *flow;
|
|
|
|
|
uint16_t vid;
|
|
|
|
|
} bond;
|
|
|
|
|
struct {
|
ofproto-dpif-xlate: Cache full flowmod for learning.
Caching the results of xlate_learn was previously dependent on the state
of the 'may_learn' flag. This meant that if the caller did not specify
that this flow may learn, then a learn entry would not be cached.
However, the xlate_cache tends to be used on a recurring basis, so
failing to cache the learn entry can provide unexpected behaviour later
on, particularly in corner cases.
Such a corner case occurred previously:-
* Revalidation was requested.
* A flow with a learn action was dumped.
* The flow had no packets.
* The flow's corresponding xcache was cleared, and the flow revalidated.
* The flow went on to receive packets after the xcache is re-created.
In this case, the xcache would be re-created, but would not refresh the
timeouts on the learnt flow until the next time it was cleared, even if
it received more traffic. This would cause flows to time out sooner than
expected. Symptoms of this bug may include unexpected forwarding
behaviour or extraneous statistics being attributed to the wrong flow.
This patch fixes the issue by caching the entire flow_mod, including
actions, upon translating an xlate_learn action. This is used to perform
a flow_mod from scratch with the original flow, rather than simply
refreshing the rule that was created during the creation of the xcache.
Bug #1252997.
Reported-by: Scott Hendricks <shendricks@vmware.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-06-03 20:44:35 +12:00
|
|
|
|
struct ofproto_dpif *ofproto;
|
|
|
|
|
struct ofputil_flow_mod *fm;
|
|
|
|
|
struct ofpbuf *ofpacts;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
} learn;
|
|
|
|
|
struct {
|
|
|
|
|
struct ofproto_dpif *ofproto;
|
|
|
|
|
struct flow *flow;
|
|
|
|
|
int vlan;
|
|
|
|
|
} normal;
|
|
|
|
|
struct {
|
|
|
|
|
struct rule_dpif *rule;
|
|
|
|
|
uint16_t idle;
|
|
|
|
|
uint16_t hard;
|
|
|
|
|
} fin;
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct {
|
|
|
|
|
struct group_dpif *group;
|
|
|
|
|
struct ofputil_bucket *bucket;
|
|
|
|
|
} group;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
struct {
|
|
|
|
|
char br_name[IFNAMSIZ];
|
2015-11-25 11:31:12 -02:00
|
|
|
|
struct in6_addr d_ipv6;
|
2015-11-30 16:24:49 -02:00
|
|
|
|
} tnl_neigh_cache;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
} u;
|
|
|
|
|
};
|
|
|
|
|
|
2016-01-15 23:24:59 +08:00
|
|
|
|
#define XC_ENTRY_FOR_EACH(ENTRY, ENTRIES, XCACHE) \
|
|
|
|
|
ENTRIES = XCACHE->entries; \
|
|
|
|
|
for (ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY); \
|
|
|
|
|
ENTRY; \
|
|
|
|
|
ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY))
|
2014-04-10 16:00:28 +12:00
|
|
|
|
|
|
|
|
|
struct xlate_cache {
|
|
|
|
|
struct ofpbuf entries;
|
|
|
|
|
};
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
/* Xlate config contains hash maps of all bridges, bundles and ports.
|
|
|
|
|
* Xcfgp contains the pointer to the current xlate configuration.
|
|
|
|
|
* When the main thread needs to change the configuration, it copies xcfgp to
|
|
|
|
|
* new_xcfg and edits new_xcfg. This enables the use of RCU locking which
|
|
|
|
|
* does not block handler and revalidator threads. */
|
|
|
|
|
struct xlate_cfg {
|
|
|
|
|
struct hmap xbridges;
|
|
|
|
|
struct hmap xbundles;
|
|
|
|
|
struct hmap xports;
|
|
|
|
|
};
|
2014-10-27 10:57:28 -07:00
|
|
|
|
static OVSRCU_TYPE(struct xlate_cfg *) xcfgp = OVSRCU_INITIALIZER(NULL);
|
2014-05-28 15:21:21 -07:00
|
|
|
|
static struct xlate_cfg *new_xcfg = NULL;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
|
|
|
|
static bool may_receive(const struct xport *, struct xlate_ctx *);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
|
|
|
|
|
struct xlate_ctx *);
|
2014-03-05 15:27:31 -08:00
|
|
|
|
static void xlate_normal(struct xlate_ctx *);
|
2015-05-10 00:04:25 -07:00
|
|
|
|
static inline void xlate_report(struct xlate_ctx *, const char *, ...)
|
|
|
|
|
OVS_PRINTF_FORMAT(2, 3);
|
2014-03-20 13:42:22 -07:00
|
|
|
|
static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
|
|
|
|
|
uint8_t table_id, bool may_packet_in,
|
|
|
|
|
bool honor_table_miss);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
static bool input_vid_is_valid(uint16_t vid, struct xbundle *, bool warn);
|
|
|
|
|
static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid);
|
|
|
|
|
static void output_normal(struct xlate_ctx *, const struct xbundle *,
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint16_t vlan);
|
2015-03-12 13:02:07 -07:00
|
|
|
|
|
|
|
|
|
/* Optional bond recirculation parameter to compose_output_action(). */
|
|
|
|
|
struct xlate_bond_recirc {
|
|
|
|
|
uint32_t recirc_id; /* !0 Use recirculation instead of output. */
|
|
|
|
|
uint8_t hash_alg; /* !0 Compute hash for recirc before. */
|
|
|
|
|
uint32_t hash_basis; /* Compute hash for recirc before. */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port,
|
|
|
|
|
const struct xlate_bond_recirc *xr);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static struct xbridge *xbridge_lookup(struct xlate_cfg *,
|
|
|
|
|
const struct ofproto_dpif *);
|
2016-01-18 14:47:40 -08:00
|
|
|
|
static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *,
|
|
|
|
|
const struct uuid *);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static struct xbundle *xbundle_lookup(struct xlate_cfg *,
|
|
|
|
|
const struct ofbundle *);
|
|
|
|
|
static struct xport *xport_lookup(struct xlate_cfg *,
|
|
|
|
|
const struct ofport_dpif *);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
|
2013-07-06 10:25:06 -07:00
|
|
|
|
static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
|
|
|
|
|
uint32_t skb_priority);
|
|
|
|
|
static void clear_skb_priorities(struct xport *);
|
2014-09-17 15:01:48 -07:00
|
|
|
|
static size_t count_skb_priorities(const struct xport *);
|
2013-07-06 10:25:06 -07:00
|
|
|
|
static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
|
|
|
|
|
uint8_t *dscp);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
2014-04-10 16:00:28 +12:00
|
|
|
|
static struct xc_entry *xlate_cache_add_entry(struct xlate_cache *xc,
|
|
|
|
|
enum xc_type type);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static void xlate_xbridge_init(struct xlate_cfg *, struct xbridge *);
|
|
|
|
|
static void xlate_xbundle_init(struct xlate_cfg *, struct xbundle *);
|
|
|
|
|
static void xlate_xport_init(struct xlate_cfg *, struct xport *);
|
2014-08-22 09:01:34 -07:00
|
|
|
|
static void xlate_xbridge_set(struct xbridge *, struct dpif *,
|
|
|
|
|
const struct mac_learning *, struct stp *,
|
|
|
|
|
struct rstp *, const struct mcast_snooping *,
|
|
|
|
|
const struct mbridge *,
|
|
|
|
|
const struct dpif_sflow *,
|
|
|
|
|
const struct dpif_ipfix *,
|
2014-11-10 13:14:29 -08:00
|
|
|
|
const struct netflow *,
|
2014-05-27 17:34:14 -07:00
|
|
|
|
bool forward_bpdu, bool has_in_band,
|
2015-02-24 16:40:42 -08:00
|
|
|
|
const struct dpif_backer_support *);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static void xlate_xbundle_set(struct xbundle *xbundle,
|
|
|
|
|
enum port_vlan_mode vlan_mode, int vlan,
|
|
|
|
|
unsigned long *trunks, bool use_priority_tags,
|
|
|
|
|
const struct bond *bond, const struct lacp *lacp,
|
|
|
|
|
bool floodable);
|
|
|
|
|
static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
|
|
|
|
|
const struct netdev *netdev, const struct cfm *cfm,
|
2015-02-20 14:17:10 -05:00
|
|
|
|
const struct bfd *bfd, const struct lldp *lldp,
|
|
|
|
|
int stp_port_no, const struct rstp_port *rstp_port,
|
2014-05-27 17:34:14 -07:00
|
|
|
|
enum ofputil_port_config config,
|
|
|
|
|
enum ofputil_port_state state, bool is_tunnel,
|
|
|
|
|
bool may_enable);
|
|
|
|
|
static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
|
|
|
|
|
static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
|
|
|
|
|
static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
|
|
|
|
|
static void xlate_xbridge_copy(struct xbridge *);
|
|
|
|
|
static void xlate_xbundle_copy(struct xbridge *, struct xbundle *);
|
|
|
|
|
static void xlate_xport_copy(struct xbridge *, struct xbundle *,
|
|
|
|
|
struct xport *);
|
|
|
|
|
static void xlate_xcfg_free(struct xlate_cfg *);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
|
2014-11-10 13:14:29 -08:00
|
|
|
|
static inline void
|
2015-05-10 00:04:25 -07:00
|
|
|
|
xlate_report(struct xlate_ctx *ctx, const char *format, ...)
|
2014-11-10 13:14:29 -08:00
|
|
|
|
{
|
|
|
|
|
if (OVS_UNLIKELY(ctx->xin->report_hook)) {
|
2015-05-10 00:04:25 -07:00
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
|
|
va_start(args, format);
|
2016-04-21 10:50:16 -07:00
|
|
|
|
ctx->xin->report_hook(ctx->xin, ctx->indentation, format, args);
|
2015-05-10 00:04:25 -07:00
|
|
|
|
va_end(args);
|
2014-11-10 13:14:29 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-05-27 17:34:14 -07:00
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
static struct vlog_rate_limit error_report_rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
|
|
|
|
|
#define XLATE_REPORT_ERROR(CTX, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (OVS_UNLIKELY((CTX)->xin->report_hook)) { \
|
|
|
|
|
xlate_report(CTX, __VA_ARGS__); \
|
|
|
|
|
} else { \
|
|
|
|
|
VLOG_ERR_RL(&error_report_rl, __VA_ARGS__); \
|
|
|
|
|
} \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
2015-07-29 22:03:31 -07:00
|
|
|
|
static inline void
|
|
|
|
|
xlate_report_actions(struct xlate_ctx *ctx, const char *title,
|
|
|
|
|
const struct ofpact *ofpacts, size_t ofpacts_len)
|
|
|
|
|
{
|
|
|
|
|
if (OVS_UNLIKELY(ctx->xin->report_hook)) {
|
|
|
|
|
struct ds s = DS_EMPTY_INITIALIZER;
|
|
|
|
|
ofpacts_format(ofpacts, ofpacts_len, &s);
|
|
|
|
|
xlate_report(ctx, "%s: %s", title, ds_cstr(&s));
|
|
|
|
|
ds_destroy(&s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
|
|
|
|
|
{
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_init(&xbridge->xbundles);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
hmap_init(&xbridge->xports);
|
|
|
|
|
hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
|
|
|
|
|
hash_pointer(xbridge->ofproto, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
|
|
|
|
|
{
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_init(&xbundle->xports);
|
|
|
|
|
ovs_list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
|
|
|
|
|
hash_pointer(xbundle->ofbundle, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xport_init(struct xlate_cfg *xcfg, struct xport *xport)
|
|
|
|
|
{
|
|
|
|
|
hmap_init(&xport->skb_priorities);
|
|
|
|
|
hmap_insert(&xcfg->xports, &xport->hmap_node,
|
|
|
|
|
hash_pointer(xport->ofport, 0));
|
|
|
|
|
hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
|
|
|
|
|
hash_ofp_port(xport->ofp_port));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xbridge_set(struct xbridge *xbridge,
|
|
|
|
|
struct dpif *dpif,
|
2013-07-25 17:20:53 -07:00
|
|
|
|
const struct mac_learning *ml, struct stp *stp,
|
2014-08-22 09:01:34 -07:00
|
|
|
|
struct rstp *rstp, const struct mcast_snooping *ms,
|
2013-07-25 17:20:53 -07:00
|
|
|
|
const struct mbridge *mbridge,
|
2013-06-13 18:38:24 -07:00
|
|
|
|
const struct dpif_sflow *sflow,
|
2013-10-30 16:29:58 -07:00
|
|
|
|
const struct dpif_ipfix *ipfix,
|
2014-11-10 13:14:29 -08:00
|
|
|
|
const struct netflow *netflow,
|
2013-12-30 14:49:25 -08:00
|
|
|
|
bool forward_bpdu, bool has_in_band,
|
2015-02-24 16:40:42 -08:00
|
|
|
|
const struct dpif_backer_support *support)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
|
|
|
|
if (xbridge->ml != ml) {
|
|
|
|
|
mac_learning_unref(xbridge->ml);
|
|
|
|
|
xbridge->ml = mac_learning_ref(ml);
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-18 22:14:31 -03:00
|
|
|
|
if (xbridge->ms != ms) {
|
|
|
|
|
mcast_snooping_unref(xbridge->ms);
|
|
|
|
|
xbridge->ms = mcast_snooping_ref(ms);
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xbridge->mbridge != mbridge) {
|
|
|
|
|
mbridge_unref(xbridge->mbridge);
|
|
|
|
|
xbridge->mbridge = mbridge_ref(mbridge);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xbridge->sflow != sflow) {
|
|
|
|
|
dpif_sflow_unref(xbridge->sflow);
|
|
|
|
|
xbridge->sflow = dpif_sflow_ref(sflow);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xbridge->ipfix != ipfix) {
|
|
|
|
|
dpif_ipfix_unref(xbridge->ipfix);
|
|
|
|
|
xbridge->ipfix = dpif_ipfix_ref(ipfix);
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-06 09:31:35 -07:00
|
|
|
|
if (xbridge->stp != stp) {
|
|
|
|
|
stp_unref(xbridge->stp);
|
|
|
|
|
xbridge->stp = stp_ref(stp);
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-22 09:01:34 -07:00
|
|
|
|
if (xbridge->rstp != rstp) {
|
|
|
|
|
rstp_unref(xbridge->rstp);
|
|
|
|
|
xbridge->rstp = rstp_ref(rstp);
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 16:29:58 -07:00
|
|
|
|
if (xbridge->netflow != netflow) {
|
|
|
|
|
netflow_unref(xbridge->netflow);
|
|
|
|
|
xbridge->netflow = netflow_ref(netflow);
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-06 11:46:48 -07:00
|
|
|
|
xbridge->dpif = dpif;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xbridge->forward_bpdu = forward_bpdu;
|
|
|
|
|
xbridge->has_in_band = has_in_band;
|
2015-02-24 16:40:42 -08:00
|
|
|
|
xbridge->support = *support;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_xbundle_set(struct xbundle *xbundle,
|
|
|
|
|
enum port_vlan_mode vlan_mode, int vlan,
|
|
|
|
|
unsigned long *trunks, bool use_priority_tags,
|
|
|
|
|
const struct bond *bond, const struct lacp *lacp,
|
|
|
|
|
bool floodable)
|
|
|
|
|
{
|
|
|
|
|
ovs_assert(xbundle->xbridge);
|
|
|
|
|
|
|
|
|
|
xbundle->vlan_mode = vlan_mode;
|
|
|
|
|
xbundle->vlan = vlan;
|
|
|
|
|
xbundle->trunks = trunks;
|
|
|
|
|
xbundle->use_priority_tags = use_priority_tags;
|
|
|
|
|
xbundle->floodable = floodable;
|
|
|
|
|
|
|
|
|
|
if (xbundle->bond != bond) {
|
|
|
|
|
bond_unref(xbundle->bond);
|
|
|
|
|
xbundle->bond = bond_ref(bond);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xbundle->lacp != lacp) {
|
|
|
|
|
lacp_unref(xbundle->lacp);
|
|
|
|
|
xbundle->lacp = lacp_ref(lacp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xport_set(struct xport *xport, odp_port_t odp_port,
|
|
|
|
|
const struct netdev *netdev, const struct cfm *cfm,
|
2015-02-20 14:17:10 -05:00
|
|
|
|
const struct bfd *bfd, const struct lldp *lldp, int stp_port_no,
|
2014-09-09 11:11:18 -07:00
|
|
|
|
const struct rstp_port* rstp_port,
|
2014-05-27 17:34:14 -07:00
|
|
|
|
enum ofputil_port_config config, enum ofputil_port_state state,
|
|
|
|
|
bool is_tunnel, bool may_enable)
|
|
|
|
|
{
|
|
|
|
|
xport->config = config;
|
|
|
|
|
xport->state = state;
|
|
|
|
|
xport->stp_port_no = stp_port_no;
|
|
|
|
|
xport->is_tunnel = is_tunnel;
|
|
|
|
|
xport->may_enable = may_enable;
|
|
|
|
|
xport->odp_port = odp_port;
|
|
|
|
|
|
2014-09-09 11:11:18 -07:00
|
|
|
|
if (xport->rstp_port != rstp_port) {
|
|
|
|
|
rstp_port_unref(xport->rstp_port);
|
|
|
|
|
xport->rstp_port = rstp_port_ref(rstp_port);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
if (xport->cfm != cfm) {
|
|
|
|
|
cfm_unref(xport->cfm);
|
|
|
|
|
xport->cfm = cfm_ref(cfm);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xport->bfd != bfd) {
|
|
|
|
|
bfd_unref(xport->bfd);
|
|
|
|
|
xport->bfd = bfd_ref(bfd);
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-20 14:17:10 -05:00
|
|
|
|
if (xport->lldp != lldp) {
|
|
|
|
|
lldp_unref(xport->lldp);
|
|
|
|
|
xport->lldp = lldp_ref(lldp);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
if (xport->netdev != netdev) {
|
|
|
|
|
netdev_close(xport->netdev);
|
|
|
|
|
xport->netdev = netdev_ref(netdev);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xbridge_copy(struct xbridge *xbridge)
|
|
|
|
|
{
|
|
|
|
|
struct xbundle *xbundle;
|
|
|
|
|
struct xport *xport;
|
|
|
|
|
struct xbridge *new_xbridge = xzalloc(sizeof *xbridge);
|
|
|
|
|
new_xbridge->ofproto = xbridge->ofproto;
|
|
|
|
|
new_xbridge->name = xstrdup(xbridge->name);
|
|
|
|
|
xlate_xbridge_init(new_xcfg, new_xbridge);
|
|
|
|
|
|
|
|
|
|
xlate_xbridge_set(new_xbridge,
|
2014-11-10 13:14:29 -08:00
|
|
|
|
xbridge->dpif, xbridge->ml, xbridge->stp,
|
2014-08-22 09:01:34 -07:00
|
|
|
|
xbridge->rstp, xbridge->ms, xbridge->mbridge,
|
|
|
|
|
xbridge->sflow, xbridge->ipfix, xbridge->netflow,
|
2015-02-24 16:40:42 -08:00
|
|
|
|
xbridge->forward_bpdu, xbridge->has_in_band,
|
|
|
|
|
&xbridge->support);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
|
|
|
|
|
xlate_xbundle_copy(new_xbridge, xbundle);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Copy xports which are not part of a xbundle */
|
|
|
|
|
HMAP_FOR_EACH (xport, ofp_node, &xbridge->xports) {
|
|
|
|
|
if (!xport->xbundle) {
|
|
|
|
|
xlate_xport_copy(new_xbridge, NULL, xport);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xbundle_copy(struct xbridge *xbridge, struct xbundle *xbundle)
|
|
|
|
|
{
|
|
|
|
|
struct xport *xport;
|
|
|
|
|
struct xbundle *new_xbundle = xzalloc(sizeof *xbundle);
|
|
|
|
|
new_xbundle->ofbundle = xbundle->ofbundle;
|
|
|
|
|
new_xbundle->xbridge = xbridge;
|
|
|
|
|
new_xbundle->name = xstrdup(xbundle->name);
|
|
|
|
|
xlate_xbundle_init(new_xcfg, new_xbundle);
|
|
|
|
|
|
|
|
|
|
xlate_xbundle_set(new_xbundle, xbundle->vlan_mode,
|
|
|
|
|
xbundle->vlan, xbundle->trunks,
|
|
|
|
|
xbundle->use_priority_tags, xbundle->bond, xbundle->lacp,
|
|
|
|
|
xbundle->floodable);
|
|
|
|
|
LIST_FOR_EACH (xport, bundle_node, &xbundle->xports) {
|
|
|
|
|
xlate_xport_copy(xbridge, new_xbundle, xport);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
|
|
|
|
|
struct xport *xport)
|
|
|
|
|
{
|
|
|
|
|
struct skb_priority_to_dscp *pdscp, *new_pdscp;
|
|
|
|
|
struct xport *new_xport = xzalloc(sizeof *xport);
|
|
|
|
|
new_xport->ofport = xport->ofport;
|
|
|
|
|
new_xport->ofp_port = xport->ofp_port;
|
|
|
|
|
new_xport->xbridge = xbridge;
|
|
|
|
|
xlate_xport_init(new_xcfg, new_xport);
|
|
|
|
|
|
|
|
|
|
xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm,
|
2015-02-20 14:17:10 -05:00
|
|
|
|
xport->bfd, xport->lldp, xport->stp_port_no,
|
|
|
|
|
xport->rstp_port, xport->config, xport->state,
|
|
|
|
|
xport->is_tunnel, xport->may_enable);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
|
|
|
|
|
if (xport->peer) {
|
|
|
|
|
struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport);
|
|
|
|
|
if (peer) {
|
|
|
|
|
new_xport->peer = peer;
|
|
|
|
|
new_xport->peer->peer = new_xport;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xbundle) {
|
|
|
|
|
new_xport->xbundle = xbundle;
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH (pdscp, hmap_node, &xport->skb_priorities) {
|
|
|
|
|
new_pdscp = xmalloc(sizeof *pdscp);
|
|
|
|
|
new_pdscp->skb_priority = pdscp->skb_priority;
|
|
|
|
|
new_pdscp->dscp = pdscp->dscp;
|
|
|
|
|
hmap_insert(&new_xport->skb_priorities, &new_pdscp->hmap_node,
|
|
|
|
|
hash_int(new_pdscp->skb_priority, 0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sets the current xlate configuration to new_xcfg and frees the old xlate
|
|
|
|
|
* configuration in xcfgp.
|
|
|
|
|
*
|
|
|
|
|
* This needs to be called after editing the xlate configuration.
|
|
|
|
|
*
|
|
|
|
|
* Functions that edit the new xlate configuration are
|
|
|
|
|
* xlate_<ofport/bundle/ofport>_set and xlate_<ofport/bundle/ofport>_remove.
|
|
|
|
|
*
|
|
|
|
|
* A sample workflow:
|
|
|
|
|
*
|
|
|
|
|
* xlate_txn_start();
|
|
|
|
|
* ...
|
|
|
|
|
* edit_xlate_configuration();
|
|
|
|
|
* ...
|
|
|
|
|
* xlate_txn_commit(); */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
void
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_txn_commit(void)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
|
|
|
|
|
ovsrcu_set(&xcfgp, new_xcfg);
|
2014-11-07 13:02:05 -08:00
|
|
|
|
ovsrcu_synchronize();
|
|
|
|
|
xlate_xcfg_free(xcfg);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
new_xcfg = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Copies the current xlate configuration in xcfgp to new_xcfg.
|
|
|
|
|
*
|
|
|
|
|
* This needs to be called prior to editing the xlate configuration. */
|
|
|
|
|
void
|
|
|
|
|
xlate_txn_start(void)
|
|
|
|
|
{
|
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
struct xlate_cfg *xcfg;
|
|
|
|
|
|
|
|
|
|
ovs_assert(!new_xcfg);
|
|
|
|
|
|
|
|
|
|
new_xcfg = xmalloc(sizeof *new_xcfg);
|
|
|
|
|
hmap_init(&new_xcfg->xbridges);
|
|
|
|
|
hmap_init(&new_xcfg->xbundles);
|
|
|
|
|
hmap_init(&new_xcfg->xports);
|
|
|
|
|
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
if (!xcfg) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
|
|
|
|
|
xlate_xbridge_copy(xbridge);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xcfg_free(struct xlate_cfg *xcfg)
|
|
|
|
|
{
|
|
|
|
|
struct xbridge *xbridge, *next_xbridge;
|
|
|
|
|
|
|
|
|
|
if (!xcfg) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) {
|
|
|
|
|
xlate_xbridge_remove(xcfg, xbridge);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hmap_destroy(&xcfg->xbridges);
|
|
|
|
|
hmap_destroy(&xcfg->xbundles);
|
|
|
|
|
hmap_destroy(&xcfg->xports);
|
|
|
|
|
free(xcfg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
|
2014-11-10 13:14:29 -08:00
|
|
|
|
struct dpif *dpif,
|
2014-05-27 17:34:14 -07:00
|
|
|
|
const struct mac_learning *ml, struct stp *stp,
|
2014-08-22 09:01:34 -07:00
|
|
|
|
struct rstp *rstp, const struct mcast_snooping *ms,
|
2014-05-27 17:34:14 -07:00
|
|
|
|
const struct mbridge *mbridge,
|
|
|
|
|
const struct dpif_sflow *sflow,
|
|
|
|
|
const struct dpif_ipfix *ipfix,
|
2014-11-10 13:14:29 -08:00
|
|
|
|
const struct netflow *netflow,
|
2015-02-24 16:40:42 -08:00
|
|
|
|
bool forward_bpdu, bool has_in_band,
|
|
|
|
|
const struct dpif_backer_support *support)
|
2014-05-27 17:34:14 -07:00
|
|
|
|
{
|
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
|
|
|
|
|
ovs_assert(new_xcfg);
|
|
|
|
|
|
|
|
|
|
xbridge = xbridge_lookup(new_xcfg, ofproto);
|
|
|
|
|
if (!xbridge) {
|
|
|
|
|
xbridge = xzalloc(sizeof *xbridge);
|
|
|
|
|
xbridge->ofproto = ofproto;
|
|
|
|
|
|
|
|
|
|
xlate_xbridge_init(new_xcfg, xbridge);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(xbridge->name);
|
|
|
|
|
xbridge->name = xstrdup(name);
|
|
|
|
|
|
2014-11-10 13:14:29 -08:00
|
|
|
|
xlate_xbridge_set(xbridge, dpif, ml, stp, rstp, ms, mbridge, sflow, ipfix,
|
2015-02-24 16:40:42 -08:00
|
|
|
|
netflow, forward_bpdu, has_in_band, support);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
|
|
|
|
struct xbundle *xbundle, *next_xbundle;
|
|
|
|
|
struct xport *xport, *next_xport;
|
|
|
|
|
|
|
|
|
|
if (!xbridge) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) {
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_xport_remove(xcfg, xport);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) {
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_xbundle_remove(xcfg, xbundle);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
hmap_remove(&xcfg->xbridges, &xbridge->hmap_node);
|
2013-08-15 18:37:41 -07:00
|
|
|
|
mac_learning_unref(xbridge->ml);
|
2014-06-18 22:14:31 -03:00
|
|
|
|
mcast_snooping_unref(xbridge->ms);
|
2013-08-15 18:37:41 -07:00
|
|
|
|
mbridge_unref(xbridge->mbridge);
|
|
|
|
|
dpif_sflow_unref(xbridge->sflow);
|
|
|
|
|
dpif_ipfix_unref(xbridge->ipfix);
|
|
|
|
|
stp_unref(xbridge->stp);
|
2014-08-22 09:01:34 -07:00
|
|
|
|
rstp_unref(xbridge->rstp);
|
2013-08-15 18:37:41 -07:00
|
|
|
|
hmap_destroy(&xbridge->xports);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
free(xbridge->name);
|
|
|
|
|
free(xbridge);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
void
|
|
|
|
|
xlate_remove_ofproto(struct ofproto_dpif *ofproto)
|
|
|
|
|
{
|
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
|
|
|
|
|
ovs_assert(new_xcfg);
|
|
|
|
|
|
|
|
|
|
xbridge = xbridge_lookup(new_xcfg, ofproto);
|
|
|
|
|
xlate_xbridge_remove(new_xcfg, xbridge);
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
void
|
|
|
|
|
xlate_bundle_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
|
|
|
|
|
const char *name, enum port_vlan_mode vlan_mode, int vlan,
|
|
|
|
|
unsigned long *trunks, bool use_priority_tags,
|
|
|
|
|
const struct bond *bond, const struct lacp *lacp,
|
|
|
|
|
bool floodable)
|
|
|
|
|
{
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xbundle *xbundle;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
ovs_assert(new_xcfg);
|
|
|
|
|
|
|
|
|
|
xbundle = xbundle_lookup(new_xcfg, ofbundle);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!xbundle) {
|
|
|
|
|
xbundle = xzalloc(sizeof *xbundle);
|
|
|
|
|
xbundle->ofbundle = ofbundle;
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbundle->xbridge = xbridge_lookup(new_xcfg, ofproto);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_xbundle_init(new_xcfg, xbundle);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(xbundle->name);
|
|
|
|
|
xbundle->name = xstrdup(name);
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_xbundle_set(xbundle, vlan_mode, vlan, trunks,
|
|
|
|
|
use_priority_tags, bond, lacp, floodable);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_xbundle_remove(struct xlate_cfg *xcfg, struct xbundle *xbundle)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
2015-04-06 14:02:28 -07:00
|
|
|
|
struct xport *xport;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
|
|
|
|
if (!xbundle) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 14:02:28 -07:00
|
|
|
|
LIST_FOR_EACH_POP (xport, bundle_node, &xbundle->xports) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xport->xbundle = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
hmap_remove(&xcfg->xbundles, &xbundle->hmap_node);
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_remove(&xbundle->list_node);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
bond_unref(xbundle->bond);
|
|
|
|
|
lacp_unref(xbundle->lacp);
|
|
|
|
|
free(xbundle->name);
|
|
|
|
|
free(xbundle);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
void
|
|
|
|
|
xlate_bundle_remove(struct ofbundle *ofbundle)
|
|
|
|
|
{
|
|
|
|
|
struct xbundle *xbundle;
|
|
|
|
|
|
|
|
|
|
ovs_assert(new_xcfg);
|
|
|
|
|
|
|
|
|
|
xbundle = xbundle_lookup(new_xcfg, ofbundle);
|
|
|
|
|
xlate_xbundle_remove(new_xcfg, xbundle);
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
void
|
|
|
|
|
xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
|
|
|
|
|
struct ofport_dpif *ofport, ofp_port_t ofp_port,
|
|
|
|
|
odp_port_t odp_port, const struct netdev *netdev,
|
|
|
|
|
const struct cfm *cfm, const struct bfd *bfd,
|
2015-02-20 14:17:10 -05:00
|
|
|
|
const struct lldp *lldp, struct ofport_dpif *peer,
|
|
|
|
|
int stp_port_no, const struct rstp_port *rstp_port,
|
2013-07-06 10:25:06 -07:00
|
|
|
|
const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
|
2013-10-30 18:17:18 +09:00
|
|
|
|
enum ofputil_port_config config,
|
|
|
|
|
enum ofputil_port_state state, bool is_tunnel,
|
2013-07-06 09:31:35 -07:00
|
|
|
|
bool may_enable)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
2013-07-06 10:25:06 -07:00
|
|
|
|
size_t i;
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xport *xport;
|
|
|
|
|
|
|
|
|
|
ovs_assert(new_xcfg);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport = xport_lookup(new_xcfg, ofport);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!xport) {
|
|
|
|
|
xport = xzalloc(sizeof *xport);
|
|
|
|
|
xport->ofport = ofport;
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport->xbridge = xbridge_lookup(new_xcfg, ofproto);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xport->ofp_port = ofp_port;
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_xport_init(new_xcfg, xport);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ovs_assert(xport->ofp_port == ofp_port);
|
|
|
|
|
|
2015-02-20 14:17:10 -05:00
|
|
|
|
xlate_xport_set(xport, odp_port, netdev, cfm, bfd, lldp,
|
|
|
|
|
stp_port_no, rstp_port, config, state, is_tunnel,
|
|
|
|
|
may_enable);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
|
|
|
|
if (xport->peer) {
|
|
|
|
|
xport->peer->peer = NULL;
|
|
|
|
|
}
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport->peer = xport_lookup(new_xcfg, peer);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xport->peer) {
|
|
|
|
|
xport->peer->peer = xport;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xport->xbundle) {
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_remove(&xport->bundle_node);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport->xbundle = xbundle_lookup(new_xcfg, ofbundle);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xport->xbundle) {
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_insert(&xport->xbundle->xports, &xport->bundle_node);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
2013-07-06 10:25:06 -07:00
|
|
|
|
|
|
|
|
|
clear_skb_priorities(xport);
|
|
|
|
|
for (i = 0; i < n_qdscp; i++) {
|
|
|
|
|
struct skb_priority_to_dscp *pdscp;
|
|
|
|
|
uint32_t skb_priority;
|
|
|
|
|
|
2013-07-06 11:46:48 -07:00
|
|
|
|
if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue,
|
|
|
|
|
&skb_priority)) {
|
2013-07-06 10:25:06 -07:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pdscp = xmalloc(sizeof *pdscp);
|
|
|
|
|
pdscp->skb_priority = skb_priority;
|
|
|
|
|
pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
|
|
|
|
|
hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
|
|
|
|
|
hash_int(pdscp->skb_priority, 0));
|
|
|
|
|
}
|
2013-06-13 18:38:24 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_xport_remove(struct xlate_cfg *xcfg, struct xport *xport)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
|
|
|
|
if (!xport) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (xport->peer) {
|
|
|
|
|
xport->peer->peer = NULL;
|
|
|
|
|
xport->peer = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-22 12:54:09 -07:00
|
|
|
|
if (xport->xbundle) {
|
2016-03-25 14:10:22 -07:00
|
|
|
|
ovs_list_remove(&xport->bundle_node);
|
2013-07-22 12:54:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-07-06 10:25:06 -07:00
|
|
|
|
clear_skb_priorities(xport);
|
|
|
|
|
hmap_destroy(&xport->skb_priorities);
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
hmap_remove(&xcfg->xports, &xport->hmap_node);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
|
|
|
|
|
|
|
|
|
|
netdev_close(xport->netdev);
|
2014-09-09 11:11:18 -07:00
|
|
|
|
rstp_port_unref(xport->rstp_port);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
cfm_unref(xport->cfm);
|
|
|
|
|
bfd_unref(xport->bfd);
|
2015-02-20 14:17:10 -05:00
|
|
|
|
lldp_unref(xport->lldp);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
free(xport);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
void
|
|
|
|
|
xlate_ofport_remove(struct ofport_dpif *ofport)
|
|
|
|
|
{
|
|
|
|
|
struct xport *xport;
|
|
|
|
|
|
|
|
|
|
ovs_assert(new_xcfg);
|
|
|
|
|
|
|
|
|
|
xport = xport_lookup(new_xcfg, ofport);
|
|
|
|
|
xlate_xport_remove(new_xcfg, xport);
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-22 09:01:36 -07:00
|
|
|
|
static struct ofproto_dpif *
|
|
|
|
|
xlate_lookup_ofproto_(const struct dpif_backer *backer, const struct flow *flow,
|
|
|
|
|
ofp_port_t *ofp_in_port, const struct xport **xportp)
|
|
|
|
|
{
|
2015-03-26 11:18:16 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2014-08-22 09:01:36 -07:00
|
|
|
|
const struct xport *xport;
|
2014-12-16 18:47:27 -08:00
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
|
|
|
|
|
? tnl_port_receive(flow)
|
|
|
|
|
: odp_port_to_ofport(backer, flow->in_port.odp_port));
|
|
|
|
|
if (OVS_UNLIKELY(!xport)) {
|
|
|
|
|
return NULL;
|
2014-08-22 09:01:36 -07:00
|
|
|
|
}
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*xportp = xport;
|
2014-12-16 18:47:27 -08:00
|
|
|
|
if (ofp_in_port) {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
*ofp_in_port = xport->ofp_port;
|
2014-12-16 18:47:27 -08:00
|
|
|
|
}
|
2015-03-26 11:18:16 -07:00
|
|
|
|
return xport->xbridge->ofproto;
|
2014-08-22 09:01:36 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Given a datapath and flow metadata ('backer', and 'flow' respectively)
|
|
|
|
|
* returns the corresponding struct ofproto_dpif and OpenFlow port number. */
|
|
|
|
|
struct ofproto_dpif *
|
|
|
|
|
xlate_lookup_ofproto(const struct dpif_backer *backer, const struct flow *flow,
|
|
|
|
|
ofp_port_t *ofp_in_port)
|
|
|
|
|
{
|
|
|
|
|
const struct xport *xport;
|
|
|
|
|
|
|
|
|
|
return xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
/* Given a datapath and flow metadata ('backer', and 'flow' respectively),
|
2014-08-22 09:01:36 -07:00
|
|
|
|
* optionally populates 'ofproto' with the ofproto_dpif, 'ofp_in_port' with the
|
2014-08-06 18:49:44 -07:00
|
|
|
|
* openflow in_port, and 'ipfix', 'sflow', and 'netflow' with the appropriate
|
2014-08-22 09:01:36 -07:00
|
|
|
|
* handles for those protocols if they're enabled. Caller may use the returned
|
|
|
|
|
* pointers until quiescing, for longer term use additional references must
|
|
|
|
|
* be taken.
|
2013-08-02 12:43:03 -07:00
|
|
|
|
*
|
2014-12-16 18:47:27 -08:00
|
|
|
|
* Returns 0 if successful, ENODEV if the parsed flow has no associated ofproto.
|
2014-08-22 09:01:36 -07:00
|
|
|
|
*/
|
2013-08-02 12:43:03 -07:00
|
|
|
|
int
|
2014-08-22 09:01:36 -07:00
|
|
|
|
xlate_lookup(const struct dpif_backer *backer, const struct flow *flow,
|
|
|
|
|
struct ofproto_dpif **ofprotop, struct dpif_ipfix **ipfix,
|
|
|
|
|
struct dpif_sflow **sflow, struct netflow **netflow,
|
|
|
|
|
ofp_port_t *ofp_in_port)
|
2013-08-02 12:43:03 -07:00
|
|
|
|
{
|
2014-08-22 09:01:36 -07:00
|
|
|
|
struct ofproto_dpif *ofproto;
|
2014-05-27 17:34:14 -07:00
|
|
|
|
const struct xport *xport;
|
2013-08-02 12:43:03 -07:00
|
|
|
|
|
2014-08-22 09:01:36 -07:00
|
|
|
|
ofproto = xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
|
2013-08-02 12:43:03 -07:00
|
|
|
|
|
2014-12-16 18:47:27 -08:00
|
|
|
|
if (!ofproto) {
|
2014-08-06 18:49:44 -07:00
|
|
|
|
return ENODEV;
|
2013-08-02 12:43:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-08-22 09:01:36 -07:00
|
|
|
|
if (ofprotop) {
|
|
|
|
|
*ofprotop = ofproto;
|
2013-08-02 12:43:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-31 16:23:13 -07:00
|
|
|
|
if (ipfix) {
|
2014-12-16 18:47:27 -08:00
|
|
|
|
*ipfix = xport ? xport->xbridge->ipfix : NULL;
|
2013-10-31 16:23:13 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sflow) {
|
2014-12-16 18:47:27 -08:00
|
|
|
|
*sflow = xport ? xport->xbridge->sflow : NULL;
|
2013-10-31 16:23:13 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (netflow) {
|
2014-12-16 18:47:27 -08:00
|
|
|
|
*netflow = xport ? xport->xbridge->netflow : NULL;
|
2013-10-31 16:23:13 -07:00
|
|
|
|
}
|
2014-12-16 18:47:27 -08:00
|
|
|
|
|
2014-08-06 18:49:44 -07:00
|
|
|
|
return 0;
|
2013-08-02 12:43:03 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
static struct xbridge *
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct hmap *xbridges;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
if (!ofproto || !xcfg) {
|
2013-08-02 19:31:02 -07:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbridges = &xcfg->xbridges;
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbridges) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xbridge->ofproto == ofproto) {
|
|
|
|
|
return xbridge;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-18 14:47:40 -08:00
|
|
|
|
static struct xbridge *
|
|
|
|
|
xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
|
|
|
|
|
{
|
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
|
|
|
|
|
if (uuid_equals(ofproto_dpif_get_uuid(xbridge->ofproto), uuid)) {
|
|
|
|
|
return xbridge;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
static struct xbundle *
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct hmap *xbundles;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbundle *xbundle;
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
if (!ofbundle || !xcfg) {
|
2013-08-02 19:31:02 -07:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbundles = &xcfg->xbundles;
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0),
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbundles) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xbundle->ofbundle == ofbundle) {
|
|
|
|
|
return xbundle;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct xport *
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport_lookup(struct xlate_cfg *xcfg, const struct ofport_dpif *ofport)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
{
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct hmap *xports;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xport *xport;
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
if (!ofport || !xcfg) {
|
2013-08-02 19:31:02 -07:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xports = &xcfg->xports;
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0),
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xports) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xport->ofport == ofport) {
|
|
|
|
|
return xport;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-02 14:55:31 -07:00
|
|
|
|
static struct stp_port *
|
|
|
|
|
xport_get_stp_port(const struct xport *xport)
|
|
|
|
|
{
|
2013-09-04 15:21:15 -07:00
|
|
|
|
return xport->xbridge->stp && xport->stp_port_no != -1
|
2013-08-02 14:55:31 -07:00
|
|
|
|
? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
|
|
|
|
|
: NULL;
|
|
|
|
|
}
|
2013-07-06 09:31:35 -07:00
|
|
|
|
|
2014-03-09 17:48:52 +08:00
|
|
|
|
static bool
|
2013-07-06 09:31:35 -07:00
|
|
|
|
xport_stp_learn_state(const struct xport *xport)
|
|
|
|
|
{
|
2013-08-02 14:55:31 -07:00
|
|
|
|
struct stp_port *sp = xport_get_stp_port(xport);
|
2014-11-14 14:07:50 -08:00
|
|
|
|
return sp
|
|
|
|
|
? stp_learn_in_state(stp_port_get_state(sp))
|
|
|
|
|
: true;
|
2013-07-06 09:31:35 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
xport_stp_forward_state(const struct xport *xport)
|
|
|
|
|
{
|
2013-08-02 14:55:31 -07:00
|
|
|
|
struct stp_port *sp = xport_get_stp_port(xport);
|
2014-11-14 14:07:50 -08:00
|
|
|
|
return sp
|
|
|
|
|
? stp_forward_in_state(stp_port_get_state(sp))
|
|
|
|
|
: true;
|
2013-07-06 09:31:35 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 17:48:52 +08:00
|
|
|
|
static bool
|
2014-07-15 18:52:19 -07:00
|
|
|
|
xport_stp_should_forward_bpdu(const struct xport *xport)
|
2014-03-09 17:48:52 +08:00
|
|
|
|
{
|
|
|
|
|
struct stp_port *sp = xport_get_stp_port(xport);
|
2014-07-15 18:52:19 -07:00
|
|
|
|
return stp_should_forward_bpdu(sp ? stp_port_get_state(sp) : STP_DISABLED);
|
2014-03-09 17:48:52 +08:00
|
|
|
|
}
|
|
|
|
|
|
2013-07-06 09:31:35 -07:00
|
|
|
|
/* Returns true if STP should process 'flow'. Sets fields in 'wc' that
|
|
|
|
|
* were used to make the determination.*/
|
|
|
|
|
static bool
|
|
|
|
|
stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
|
|
|
|
|
{
|
2014-04-24 13:18:18 -07:00
|
|
|
|
/* is_stp() also checks dl_type, but dl_type is always set in 'wc'. */
|
2013-07-06 09:31:35 -07:00
|
|
|
|
memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
|
2014-04-24 13:18:18 -07:00
|
|
|
|
return is_stp(flow);
|
2013-07-06 09:31:35 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2015-02-22 03:21:09 -08:00
|
|
|
|
stp_process_packet(const struct xport *xport, const struct dp_packet *packet)
|
2013-07-06 09:31:35 -07:00
|
|
|
|
{
|
2013-08-02 14:55:31 -07:00
|
|
|
|
struct stp_port *sp = xport_get_stp_port(xport);
|
2015-02-22 03:21:09 -08:00
|
|
|
|
struct dp_packet payload = *packet;
|
|
|
|
|
struct eth_header *eth = dp_packet_data(&payload);
|
2013-07-06 09:31:35 -07:00
|
|
|
|
|
|
|
|
|
/* Sink packets on ports that have STP disabled when the bridge has
|
|
|
|
|
* STP enabled. */
|
|
|
|
|
if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Trim off padding on payload. */
|
2015-02-22 03:21:09 -08:00
|
|
|
|
if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
|
|
|
|
|
dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
|
2013-07-06 09:31:35 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-22 03:21:09 -08:00
|
|
|
|
if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
|
|
|
|
|
stp_received_bpdu(sp, dp_packet_data(&payload), dp_packet_size(&payload));
|
2013-07-06 09:31:35 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-09-09 11:11:18 -07:00
|
|
|
|
static enum rstp_state
|
|
|
|
|
xport_get_rstp_port_state(const struct xport *xport)
|
2014-08-22 09:01:34 -07:00
|
|
|
|
{
|
2014-09-09 11:11:18 -07:00
|
|
|
|
return xport->rstp_port
|
|
|
|
|
? rstp_port_get_state(xport->rstp_port)
|
|
|
|
|
: RSTP_DISABLED;
|
2014-08-22 09:01:34 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
xport_rstp_learn_state(const struct xport *xport)
|
|
|
|
|
{
|
2014-11-14 14:07:50 -08:00
|
|
|
|
return xport->xbridge->rstp && xport->rstp_port
|
|
|
|
|
? rstp_learn_in_state(xport_get_rstp_port_state(xport))
|
|
|
|
|
: true;
|
2014-08-22 09:01:34 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
xport_rstp_forward_state(const struct xport *xport)
|
|
|
|
|
{
|
2014-11-14 14:07:50 -08:00
|
|
|
|
return xport->xbridge->rstp && xport->rstp_port
|
|
|
|
|
? rstp_forward_in_state(xport_get_rstp_port_state(xport))
|
|
|
|
|
: true;
|
2014-08-22 09:01:34 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
xport_rstp_should_manage_bpdu(const struct xport *xport)
|
|
|
|
|
{
|
2014-09-09 11:11:18 -07:00
|
|
|
|
return rstp_should_manage_bpdu(xport_get_rstp_port_state(xport));
|
2014-08-22 09:01:34 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2015-02-22 03:21:09 -08:00
|
|
|
|
rstp_process_packet(const struct xport *xport, const struct dp_packet *packet)
|
2014-08-22 09:01:34 -07:00
|
|
|
|
{
|
2015-02-22 03:21:09 -08:00
|
|
|
|
struct dp_packet payload = *packet;
|
|
|
|
|
struct eth_header *eth = dp_packet_data(&payload);
|
2014-08-22 09:01:34 -07:00
|
|
|
|
|
2014-09-09 11:11:18 -07:00
|
|
|
|
/* Sink packets on ports that have no RSTP. */
|
|
|
|
|
if (!xport->rstp_port) {
|
2014-08-22 09:01:34 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Trim off padding on payload. */
|
2015-02-22 03:21:09 -08:00
|
|
|
|
if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
|
|
|
|
|
dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
|
2014-08-22 09:01:34 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-22 03:21:09 -08:00
|
|
|
|
if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
|
|
|
|
|
rstp_port_received_bpdu(xport->rstp_port, dp_packet_data(&payload),
|
|
|
|
|
dp_packet_size(&payload));
|
2014-08-22 09:01:34 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
static struct xport *
|
|
|
|
|
get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
|
|
|
|
|
{
|
|
|
|
|
struct xport *xport;
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH_IN_BUCKET (xport, ofp_node, hash_ofp_port(ofp_port),
|
|
|
|
|
&xbridge->xports) {
|
|
|
|
|
if (xport->ofp_port == ofp_port) {
|
|
|
|
|
return xport;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static odp_port_t
|
|
|
|
|
ofp_port_to_odp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
|
|
|
|
|
{
|
|
|
|
|
const struct xport *xport = get_ofp_port(xbridge, ofp_port);
|
|
|
|
|
return xport ? xport->odp_port : ODPP_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 18:17:18 +09:00
|
|
|
|
static bool
|
|
|
|
|
odp_port_is_alive(const struct xlate_ctx *ctx, ofp_port_t ofp_port)
|
|
|
|
|
{
|
2014-10-16 15:00:03 -07:00
|
|
|
|
struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
|
|
|
|
|
return xport && xport->may_enable;
|
2013-10-30 18:17:18 +09:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-22 10:47:13 +00:00
|
|
|
|
static struct ofputil_bucket *
|
2013-10-30 18:17:18 +09:00
|
|
|
|
group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif *,
|
|
|
|
|
int depth);
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
|
|
|
|
|
{
|
|
|
|
|
struct group_dpif *group;
|
|
|
|
|
|
2014-05-22 09:24:23 -07:00
|
|
|
|
if (group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group)) {
|
|
|
|
|
struct ofputil_bucket *bucket;
|
2013-10-30 18:17:18 +09:00
|
|
|
|
|
2014-05-22 09:24:23 -07:00
|
|
|
|
bucket = group_first_live_bucket(ctx, group, depth);
|
|
|
|
|
group_dpif_unref(group);
|
2016-05-11 08:46:33 +00:00
|
|
|
|
return bucket != NULL;
|
2014-05-22 09:24:23 -07:00
|
|
|
|
}
|
2013-10-30 18:17:18 +09:00
|
|
|
|
|
2014-05-22 09:24:23 -07:00
|
|
|
|
return false;
|
2013-10-30 18:17:18 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define MAX_LIVENESS_RECURSION 128 /* Arbitrary limit */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
bucket_is_alive(const struct xlate_ctx *ctx,
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct ofputil_bucket *bucket, int depth)
|
2013-10-30 18:17:18 +09:00
|
|
|
|
{
|
|
|
|
|
if (depth >= MAX_LIVENESS_RECURSION) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
|
|
|
|
|
|
|
|
|
|
VLOG_WARN_RL(&rl, "bucket chaining exceeded %d links",
|
|
|
|
|
MAX_LIVENESS_RECURSION);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-22 09:35:36 -07:00
|
|
|
|
return (!ofputil_bucket_has_liveness(bucket)
|
|
|
|
|
|| (bucket->watch_port != OFPP_ANY
|
|
|
|
|
&& odp_port_is_alive(ctx, bucket->watch_port))
|
|
|
|
|
|| (bucket->watch_group != OFPG_ANY
|
|
|
|
|
&& group_is_alive(ctx, bucket->watch_group, depth + 1)));
|
2013-10-30 18:17:18 +09:00
|
|
|
|
}
|
|
|
|
|
|
2014-05-22 10:47:13 +00:00
|
|
|
|
static struct ofputil_bucket *
|
2013-10-30 18:17:18 +09:00
|
|
|
|
group_first_live_bucket(const struct xlate_ctx *ctx,
|
|
|
|
|
const struct group_dpif *group, int depth)
|
|
|
|
|
{
|
|
|
|
|
struct ofputil_bucket *bucket;
|
2014-12-15 14:10:38 +01:00
|
|
|
|
const struct ovs_list *buckets;
|
2013-10-30 18:17:18 +09:00
|
|
|
|
|
|
|
|
|
group_dpif_get_buckets(group, &buckets);
|
|
|
|
|
LIST_FOR_EACH (bucket, list_node, buckets) {
|
|
|
|
|
if (bucket_is_alive(ctx, bucket, depth)) {
|
|
|
|
|
return bucket;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-22 10:47:13 +00:00
|
|
|
|
static struct ofputil_bucket *
|
2013-10-30 18:17:19 +09:00
|
|
|
|
group_best_live_bucket(const struct xlate_ctx *ctx,
|
|
|
|
|
const struct group_dpif *group,
|
|
|
|
|
uint32_t basis)
|
|
|
|
|
{
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct ofputil_bucket *best_bucket = NULL;
|
2013-10-30 18:17:19 +09:00
|
|
|
|
uint32_t best_score = 0;
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct ofputil_bucket *bucket;
|
2014-12-15 14:10:38 +01:00
|
|
|
|
const struct ovs_list *buckets;
|
2013-10-30 18:17:19 +09:00
|
|
|
|
|
|
|
|
|
group_dpif_get_buckets(group, &buckets);
|
|
|
|
|
LIST_FOR_EACH (bucket, list_node, buckets) {
|
|
|
|
|
if (bucket_is_alive(ctx, bucket, 0)) {
|
2013-10-30 18:17:20 +09:00
|
|
|
|
uint32_t score = (hash_int(i, basis) & 0xffff) * bucket->weight;
|
2013-10-30 18:17:19 +09:00
|
|
|
|
if (score >= best_score) {
|
|
|
|
|
best_bucket = bucket;
|
|
|
|
|
best_score = score;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return best_bucket;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static bool
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xbundle_trunks_vlan(const struct xbundle *bundle, uint16_t vlan)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
|
|
|
|
return (bundle->vlan_mode != PORT_VLAN_ACCESS
|
|
|
|
|
&& (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xbundle_includes_vlan(const struct xbundle *xbundle, uint16_t vlan)
|
|
|
|
|
{
|
|
|
|
|
return vlan == xbundle->vlan || xbundle_trunks_vlan(xbundle, vlan);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static mirror_mask_t
|
|
|
|
|
xbundle_mirror_out(const struct xbridge *xbridge, struct xbundle *xbundle)
|
|
|
|
|
{
|
|
|
|
|
return xbundle != &ofpp_none_bundle
|
|
|
|
|
? mirror_bundle_out(xbridge->mbridge, xbundle->ofbundle)
|
|
|
|
|
: 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static mirror_mask_t
|
|
|
|
|
xbundle_mirror_src(const struct xbridge *xbridge, struct xbundle *xbundle)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
return xbundle != &ofpp_none_bundle
|
|
|
|
|
? mirror_bundle_src(xbridge->mbridge, xbundle->ofbundle)
|
|
|
|
|
: 0;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
static mirror_mask_t
|
|
|
|
|
xbundle_mirror_dst(const struct xbridge *xbridge, struct xbundle *xbundle)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
return xbundle != &ofpp_none_bundle
|
|
|
|
|
? mirror_bundle_dst(xbridge->mbridge, xbundle->ofbundle)
|
|
|
|
|
: 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct xbundle *
|
|
|
|
|
lookup_input_bundle(const struct xbridge *xbridge, ofp_port_t in_port,
|
|
|
|
|
bool warn, struct xport **in_xportp)
|
|
|
|
|
{
|
|
|
|
|
struct xport *xport;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
/* Find the port and bundle for the received packet. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xport = get_ofp_port(xbridge, in_port);
|
|
|
|
|
if (in_xportp) {
|
|
|
|
|
*in_xportp = xport;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xport && xport->xbundle) {
|
|
|
|
|
return xport->xbundle;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-04-30 10:10:12 +09:00
|
|
|
|
/* Special-case OFPP_NONE (OF1.0) and OFPP_CONTROLLER (OF1.1+),
|
|
|
|
|
* which a controller may use as the ingress port for traffic that
|
|
|
|
|
* it is sourcing. */
|
|
|
|
|
if (in_port == OFPP_CONTROLLER || in_port == OFPP_NONE) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return &ofpp_none_bundle;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Odd. A few possible reasons here:
|
|
|
|
|
*
|
|
|
|
|
* - We deleted a port but there are still a few packets queued up
|
|
|
|
|
* from it.
|
|
|
|
|
*
|
|
|
|
|
* - Someone externally added a port (e.g. "ovs-dpctl add-if") that
|
|
|
|
|
* we don't know about.
|
|
|
|
|
*
|
|
|
|
|
* - The ofproto client didn't configure the port as part of a bundle.
|
|
|
|
|
* This is particularly likely to happen if a packet was received on the
|
|
|
|
|
* port after it was created, but before the client had a chance to
|
|
|
|
|
* configure its bundle.
|
|
|
|
|
*/
|
|
|
|
|
if (warn) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
|
|
|
|
|
VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
|
2013-06-13 18:38:24 -07:00
|
|
|
|
"port %"PRIu16, xbridge->name, in_port);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
/* Mirrors the packet represented by 'ctx' to appropriate mirror destinations,
|
|
|
|
|
* given the packet is ingressing or egressing on 'xbundle', which has ingress
|
|
|
|
|
* or egress (as appropriate) mirrors 'mirrors'. */
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
|
|
|
|
|
mirror_mask_t mirrors)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
/* Figure out what VLAN the packet is in (because mirrors can select
|
|
|
|
|
* packets on basis of VLAN). */
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
bool warn = ctx->xin->packet != NULL;
|
|
|
|
|
uint16_t vid = vlan_tci_to_vid(ctx->xin->flow.vlan_tci);
|
|
|
|
|
if (!input_vid_is_valid(vid, xbundle, warn)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
uint16_t vlan = input_vid_to_vlan(xbundle, vid);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
const struct xbridge *xbridge = ctx->xbridge;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
/* Don't mirror to destinations that we've already mirrored to. */
|
|
|
|
|
mirrors &= ~ctx->mirrors;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (!mirrors) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
if (ctx->xin->resubmit_stats) {
|
|
|
|
|
mirror_update_stats(xbridge->mbridge, mirrors,
|
|
|
|
|
ctx->xin->resubmit_stats->n_packets,
|
|
|
|
|
ctx->xin->resubmit_stats->n_bytes);
|
|
|
|
|
}
|
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_MIRROR);
|
|
|
|
|
entry->u.mirror.mbridge = mbridge_ref(xbridge->mbridge);
|
|
|
|
|
entry->u.mirror.mirrors = mirrors;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
/* 'mirrors' is a bit-mask of candidates for mirroring. Iterate as long as
|
|
|
|
|
* some candidates remain. */
|
2013-06-11 13:32:30 -07:00
|
|
|
|
while (mirrors) {
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
const unsigned long *vlans;
|
2013-06-20 13:00:27 -07:00
|
|
|
|
mirror_mask_t dup_mirrors;
|
|
|
|
|
struct ofbundle *out;
|
|
|
|
|
int out_vlan;
|
|
|
|
|
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
/* Get the details of the mirror represented by the rightmost 1-bit. */
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors),
|
|
|
|
|
&vlans, &dup_mirrors, &out, &out_vlan);
|
2013-06-20 13:00:27 -07:00
|
|
|
|
ovs_assert(has_mirror);
|
|
|
|
|
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
/* If this mirror selects on the basis of VLAN, and it does not select
|
|
|
|
|
* 'vlan', then discard this mirror and go on to the next one. */
|
2013-06-20 13:00:27 -07:00
|
|
|
|
if (vlans) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ctx->wc->masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
if (vlans && !bitmap_is_set(vlans, vlan)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
mirrors = zero_rightmost_1bit(mirrors);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
/* Record the mirror, and the mirrors that output to the same
|
|
|
|
|
* destination, so that we don't mirror to them again. This must be
|
|
|
|
|
* done now to ensure that output_normal(), below, doesn't recursively
|
|
|
|
|
* output to the same mirrors. */
|
2015-07-23 17:08:14 -07:00
|
|
|
|
ctx->mirrors |= dup_mirrors;
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
|
|
|
|
|
/* Send the packet to the mirror. */
|
2013-06-20 13:00:27 -07:00
|
|
|
|
if (out) {
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
struct xbundle *out_xbundle = xbundle_lookup(xcfg, out);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (out_xbundle) {
|
|
|
|
|
output_normal(ctx, out_xbundle, vlan);
|
|
|
|
|
}
|
2013-06-20 13:00:27 -07:00
|
|
|
|
} else if (vlan != out_vlan
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
&& !eth_addr_is_reserved(ctx->xin->flow.dl_dst)) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbundle *xbundle;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
|
|
|
|
|
if (xbundle_includes_vlan(xbundle, out_vlan)
|
|
|
|
|
&& !xbundle_mirror_out(xbridge, xbundle)) {
|
|
|
|
|
output_normal(ctx, xbundle, out_vlan);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
ofproto-dpif-xlate: Don't consider mirrors used when excluded by VLAN.
Mirrors can be configured to select packets for mirroring on the basis
of multiple criteria: input ports, output ports, and VLANs. A packet P
is to be mirrored if there exists a mirror M such that either:
- P ingresses on an input port selected by M, or
- P egresses on an output port selected by M
AND P is in a VLAN selected by M.
In addition, every mirror has a destination, which can be an output port
or an output VLAN. Either way, if a packet is mirrored to a particular
destination, it is done only once, even if different mirrors both select
a packet and have the same destination.
Since commit efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better
fit flow translation.), these requirements have been implemented
incorrectly: if a packet satisfies one of the bulleted requirements
above for mirror M1, but not the VLAN selection requirement for M1,
then it was not sent to M's destination, but it was still considered
as having been sent to M1's destination for the purpose of avoid output
duplication. Thus, if P satisfied *all* of the requirements for a
second mirror M2, if M1 and M2 had the same destination, the packet was
still not mirrored. This commit fixes that problem.
(The issue only occurred if M1 happened to have a smaller index than
M2 in OVS's internal data structures. That's just a matter of luck.)
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: http://openvswitch.org/pipermail/dev/2016-January/064531.html
Fixes: 7efbc3b7c4006c (ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.)
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-05 19:16:01 -08:00
|
|
|
|
|
|
|
|
|
/* output_normal() could have recursively output (to different
|
|
|
|
|
* mirrors), so make sure that we don't send duplicates. */
|
|
|
|
|
mirrors &= ~ctx->mirrors;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
static void
|
|
|
|
|
mirror_ingress_packet(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
if (mbridge_has_mirrors(ctx->xbridge->mbridge)) {
|
|
|
|
|
bool warn = ctx->xin->packet != NULL;
|
|
|
|
|
struct xbundle *xbundle = lookup_input_bundle(
|
|
|
|
|
ctx->xbridge, ctx->xin->flow.in_port.ofp_port, warn, NULL);
|
|
|
|
|
if (xbundle) {
|
|
|
|
|
mirror_packet(ctx, xbundle,
|
|
|
|
|
xbundle_mirror_src(ctx->xbridge, xbundle));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
/* Given 'vid', the VID obtained from the 802.1Q header that was received as
|
2013-06-13 18:38:24 -07:00
|
|
|
|
* part of a packet (specify 0 if there was no 802.1Q header), and 'in_xbundle',
|
2013-06-11 13:32:30 -07:00
|
|
|
|
* the bundle on which the packet was received, returns the VLAN to which the
|
|
|
|
|
* packet belongs.
|
|
|
|
|
*
|
|
|
|
|
* Both 'vid' and the return value are in the range 0...4095. */
|
|
|
|
|
static uint16_t
|
2013-06-13 18:38:24 -07:00
|
|
|
|
input_vid_to_vlan(const struct xbundle *in_xbundle, uint16_t vid)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
switch (in_xbundle->vlan_mode) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case PORT_VLAN_ACCESS:
|
2013-06-13 18:38:24 -07:00
|
|
|
|
return in_xbundle->vlan;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case PORT_VLAN_TRUNK:
|
|
|
|
|
return vid;
|
|
|
|
|
|
|
|
|
|
case PORT_VLAN_NATIVE_UNTAGGED:
|
|
|
|
|
case PORT_VLAN_NATIVE_TAGGED:
|
2013-06-13 18:38:24 -07:00
|
|
|
|
return vid ? vid : in_xbundle->vlan;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
default:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
/* Checks whether a packet with the given 'vid' may ingress on 'in_xbundle'.
|
2013-06-11 13:32:30 -07:00
|
|
|
|
* If so, returns true. Otherwise, returns false and, if 'warn' is true, logs
|
|
|
|
|
* a warning.
|
|
|
|
|
*
|
|
|
|
|
* 'vid' should be the VID obtained from the 802.1Q header that was received as
|
|
|
|
|
* part of a packet (specify 0 if there was no 802.1Q header), in the range
|
|
|
|
|
* 0...4095. */
|
|
|
|
|
static bool
|
2013-06-13 18:38:24 -07:00
|
|
|
|
input_vid_is_valid(uint16_t vid, struct xbundle *in_xbundle, bool warn)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
|
|
|
|
/* Allow any VID on the OFPP_NONE port. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (in_xbundle == &ofpp_none_bundle) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
switch (in_xbundle->vlan_mode) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case PORT_VLAN_ACCESS:
|
|
|
|
|
if (vid) {
|
|
|
|
|
if (warn) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" tagged "
|
2013-06-11 13:32:30 -07:00
|
|
|
|
"packet received on port %s configured as VLAN "
|
2013-06-13 18:38:24 -07:00
|
|
|
|
"%"PRIu16" access port", vid, in_xbundle->name,
|
|
|
|
|
in_xbundle->vlan);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
case PORT_VLAN_NATIVE_UNTAGGED:
|
|
|
|
|
case PORT_VLAN_NATIVE_TAGGED:
|
|
|
|
|
if (!vid) {
|
|
|
|
|
/* Port must always carry its native VLAN. */
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
/* Fall through. */
|
|
|
|
|
case PORT_VLAN_TRUNK:
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!xbundle_includes_vlan(in_xbundle, vid)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (warn) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" packet "
|
2013-06-11 13:32:30 -07:00
|
|
|
|
"received on port %s not configured for trunking "
|
2013-06-13 18:38:24 -07:00
|
|
|
|
"VLAN %"PRIu16, vid, in_xbundle->name, vid);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Given 'vlan', the VLAN that a packet belongs to, and
|
2013-06-13 18:38:24 -07:00
|
|
|
|
* 'out_xbundle', a bundle on which the packet is to be output, returns the VID
|
2013-06-11 13:32:30 -07:00
|
|
|
|
* that should be included in the 802.1Q header. (If the return value is 0,
|
|
|
|
|
* then the 802.1Q header should only be included in the packet if there is a
|
|
|
|
|
* nonzero PCP.)
|
|
|
|
|
*
|
|
|
|
|
* Both 'vlan' and the return value are in the range 0...4095. */
|
|
|
|
|
static uint16_t
|
2013-06-13 18:38:24 -07:00
|
|
|
|
output_vlan_to_vid(const struct xbundle *out_xbundle, uint16_t vlan)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
switch (out_xbundle->vlan_mode) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case PORT_VLAN_ACCESS:
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
case PORT_VLAN_TRUNK:
|
|
|
|
|
case PORT_VLAN_NATIVE_TAGGED:
|
|
|
|
|
return vlan;
|
|
|
|
|
|
|
|
|
|
case PORT_VLAN_NATIVE_UNTAGGED:
|
2013-06-13 18:38:24 -07:00
|
|
|
|
return vlan == out_xbundle->vlan ? 0 : vlan;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
default:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2013-06-13 18:38:24 -07:00
|
|
|
|
output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint16_t vlan)
|
|
|
|
|
{
|
2013-06-12 14:37:18 -07:00
|
|
|
|
ovs_be16 *flow_tci = &ctx->xin->flow.vlan_tci;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint16_t vid;
|
|
|
|
|
ovs_be16 tci, old_tci;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xport *xport;
|
2015-03-12 13:02:07 -07:00
|
|
|
|
struct xlate_bond_recirc xr;
|
|
|
|
|
bool use_recirc = false;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
vid = output_vlan_to_vid(out_xbundle, vlan);
|
2016-03-25 14:10:22 -07:00
|
|
|
|
if (ovs_list_is_empty(&out_xbundle->xports)) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
/* Partially configured bundle with no slaves. Drop the packet. */
|
|
|
|
|
return;
|
|
|
|
|
} else if (!out_xbundle->bond) {
|
2016-03-25 14:10:22 -07:00
|
|
|
|
xport = CONTAINER_OF(ovs_list_front(&out_xbundle->xports), struct xport,
|
2013-06-13 18:38:24 -07:00
|
|
|
|
bundle_node);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc = ctx->wc;
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct ofport_dpif *ofport;
|
2014-03-05 15:27:31 -08:00
|
|
|
|
|
2015-06-30 16:43:03 -07:00
|
|
|
|
if (ctx->xbridge->support.odp.recirc) {
|
2015-03-12 13:02:07 -07:00
|
|
|
|
use_recirc = bond_may_recirc(
|
|
|
|
|
out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
|
2014-03-05 15:27:31 -08:00
|
|
|
|
|
2015-03-12 13:02:07 -07:00
|
|
|
|
if (use_recirc) {
|
2014-03-05 15:27:31 -08:00
|
|
|
|
/* Only TCP mode uses recirculation. */
|
2015-03-12 13:02:07 -07:00
|
|
|
|
xr.hash_alg = OVS_HASH_ALG_L4;
|
2014-03-05 15:27:31 -08:00
|
|
|
|
bond_update_post_recirc_rules(out_xbundle->bond, false);
|
2014-04-07 21:49:07 -07:00
|
|
|
|
|
|
|
|
|
/* Recirculation does not require unmasking hash fields. */
|
|
|
|
|
wc = NULL;
|
2014-03-05 15:27:31 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
2014-04-07 21:49:07 -07:00
|
|
|
|
ofport = bond_choose_output_slave(out_xbundle->bond,
|
|
|
|
|
&ctx->xin->flow, wc, vid);
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport = xport_lookup(xcfg, ofport);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
|
|
|
|
|
if (!xport) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
/* No slaves enabled, so drop packet. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
2013-11-12 18:18:01 -08:00
|
|
|
|
|
2015-03-12 13:02:07 -07:00
|
|
|
|
/* If use_recirc is set, the main thread will handle stats
|
2014-04-10 16:00:28 +12:00
|
|
|
|
* accounting for this bond. */
|
2015-03-12 13:02:07 -07:00
|
|
|
|
if (!use_recirc) {
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx->xin->resubmit_stats) {
|
|
|
|
|
bond_account(out_xbundle->bond, &ctx->xin->flow, vid,
|
|
|
|
|
ctx->xin->resubmit_stats->n_bytes);
|
|
|
|
|
}
|
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
struct flow *flow;
|
|
|
|
|
|
|
|
|
|
flow = &ctx->xin->flow;
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_BOND);
|
|
|
|
|
entry->u.bond.bond = bond_ref(out_xbundle->bond);
|
|
|
|
|
entry->u.bond.flow = xmemdup(flow, sizeof *flow);
|
|
|
|
|
entry->u.bond.vid = vid;
|
|
|
|
|
}
|
2013-11-12 18:18:01 -08:00
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-12 14:37:18 -07:00
|
|
|
|
old_tci = *flow_tci;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
tci = htons(vid);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (tci || out_xbundle->use_priority_tags) {
|
2013-06-12 14:37:18 -07:00
|
|
|
|
tci |= *flow_tci & htons(VLAN_PCP_MASK);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (tci) {
|
|
|
|
|
tci |= htons(VLAN_CFI);
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-12 14:37:18 -07:00
|
|
|
|
*flow_tci = tci;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(ctx, xport->ofp_port, use_recirc ? &xr : NULL);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
*flow_tci = old_tci;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
|
|
|
|
|
* migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to
|
|
|
|
|
* indicate this; newer upstream kernels use gratuitous ARP requests. */
|
|
|
|
|
static bool
|
|
|
|
|
is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc)
|
|
|
|
|
{
|
|
|
|
|
if (flow->dl_type != htons(ETH_TYPE_ARP)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
|
|
|
|
|
if (!eth_addr_is_broadcast(flow->dl_dst)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
|
|
|
|
|
if (flow->nw_proto == ARP_OP_REPLY) {
|
|
|
|
|
return true;
|
|
|
|
|
} else if (flow->nw_proto == ARP_OP_REQUEST) {
|
|
|
|
|
memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
|
|
|
|
|
memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
|
|
|
|
|
|
|
|
|
|
return flow->nw_src == flow->nw_dst;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-11 18:34:14 -03:00
|
|
|
|
/* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
|
|
|
|
|
* dropped. Returns true if they may be forwarded, false if they should be
|
|
|
|
|
* dropped.
|
|
|
|
|
*
|
|
|
|
|
* 'in_port' must be the xport that corresponds to flow->in_port.
|
|
|
|
|
* 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull).
|
|
|
|
|
*
|
|
|
|
|
* 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as
|
|
|
|
|
* returned by input_vid_to_vlan(). It must be a valid VLAN for 'in_port', as
|
|
|
|
|
* checked by input_vid_is_valid().
|
|
|
|
|
*
|
|
|
|
|
* May also add tags to '*tags', although the current implementation only does
|
|
|
|
|
* so in one special case.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
|
|
|
|
|
uint16_t vlan)
|
|
|
|
|
{
|
|
|
|
|
struct xbundle *in_xbundle = in_port->xbundle;
|
|
|
|
|
const struct xbridge *xbridge = ctx->xbridge;
|
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
|
|
|
|
|
|
|
|
|
/* Drop frames for reserved multicast addresses
|
|
|
|
|
* only if forward_bpdu option is absent. */
|
|
|
|
|
if (!xbridge->forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) {
|
|
|
|
|
xlate_report(ctx, "packet has reserved destination MAC, dropping");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (in_xbundle->bond) {
|
|
|
|
|
struct mac_entry *mac;
|
|
|
|
|
|
|
|
|
|
switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
|
|
|
|
|
flow->dl_dst)) {
|
|
|
|
|
case BV_ACCEPT:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case BV_DROP:
|
|
|
|
|
xlate_report(ctx, "bonding refused admissibility, dropping");
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
case BV_DROP_IF_MOVED:
|
|
|
|
|
ovs_rwlock_rdlock(&xbridge->ml->rwlock);
|
|
|
|
|
mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
|
mac-learning: Implement per-port MAC learning fairness.
In "MAC flooding", an attacker transmits an overwhelming number of frames
with unique Ethernet source address on a switch port. The goal is to
force the switch to evict all useful MAC learning table entries, so that
its behavior degenerates to that of a hub, flooding all traffic. In turn,
that allows an attacker to eavesdrop on the traffic of other hosts attached
to the switch, with all the risks that that entails.
Before this commit, the Open vSwitch "normal" action that implements its
standalone switch behavior (and that can be used by OpenFlow controllers
as well) was vulnerable to MAC flooding attacks. This commit fixes the
problem by implementing per-port fairness for MAC table entries: when
the MAC table is at its maximum size, MAC table eviction always deletes an
entry from the port with the most entries. Thus, MAC entries will never
be evicted from ports with only a few entries if a port with a huge number
of entries exists.
Controllers could introduce their own MAC flooding vulnerabilities into
OVS. For a controller that adds destination MAC based flows to an OpenFlow
flow table as a reaction to "packet-in" events, such a bug, if it exists,
would be in the controller code itself and would need to be fixed in the
controller. For a controller that relies on the Open vSwitch "learn"
action to add destination MAC based flows, Open vSwitch has existing
support for eviction policy similar to that implemented in this commit
through the "groups" column in the Flow_Table table documented in
ovs-vswitchd.conf.db(5); we recommend that users of "learn" not already
familiar with eviction groups to read that documentation.
In addition to implementation of per-port MAC learning fairness,
this commit includes some closely related changes:
- Access to client-provided "port" data in struct mac_entry
is now abstracted through helper functions, which makes it
easier to ensure that the per-port data structures are maintained
consistently.
- The mac_learning_changed() function, which had become trivial,
vestigial, and confusing, was removed. Its functionality was folded
into the new function mac_entry_set_port().
- Many comments were added and improved; there had been a lot of
comment rot in previous versions.
CERT: VU#784996
Reported-by: "Ronny L. Bull - bullrl" <bullrl@clarkson.edu>
Reported-at: http://www.irongeek.com/i.php?page=videos/derbycon4/t314-exploring-layer-2-network-security-in-virtualized-environments-ronny-l-bull-dr-jeanna-n-matthews
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2015-02-11 23:34:50 -08:00
|
|
|
|
if (mac
|
|
|
|
|
&& mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle
|
2015-07-31 13:15:52 -07:00
|
|
|
|
&& (!is_gratuitous_arp(flow, ctx->wc)
|
mac-learning: Implement per-port MAC learning fairness.
In "MAC flooding", an attacker transmits an overwhelming number of frames
with unique Ethernet source address on a switch port. The goal is to
force the switch to evict all useful MAC learning table entries, so that
its behavior degenerates to that of a hub, flooding all traffic. In turn,
that allows an attacker to eavesdrop on the traffic of other hosts attached
to the switch, with all the risks that that entails.
Before this commit, the Open vSwitch "normal" action that implements its
standalone switch behavior (and that can be used by OpenFlow controllers
as well) was vulnerable to MAC flooding attacks. This commit fixes the
problem by implementing per-port fairness for MAC table entries: when
the MAC table is at its maximum size, MAC table eviction always deletes an
entry from the port with the most entries. Thus, MAC entries will never
be evicted from ports with only a few entries if a port with a huge number
of entries exists.
Controllers could introduce their own MAC flooding vulnerabilities into
OVS. For a controller that adds destination MAC based flows to an OpenFlow
flow table as a reaction to "packet-in" events, such a bug, if it exists,
would be in the controller code itself and would need to be fixed in the
controller. For a controller that relies on the Open vSwitch "learn"
action to add destination MAC based flows, Open vSwitch has existing
support for eviction policy similar to that implemented in this commit
through the "groups" column in the Flow_Table table documented in
ovs-vswitchd.conf.db(5); we recommend that users of "learn" not already
familiar with eviction groups to read that documentation.
In addition to implementation of per-port MAC learning fairness,
this commit includes some closely related changes:
- Access to client-provided "port" data in struct mac_entry
is now abstracted through helper functions, which makes it
easier to ensure that the per-port data structures are maintained
consistently.
- The mac_learning_changed() function, which had become trivial,
vestigial, and confusing, was removed. Its functionality was folded
into the new function mac_entry_set_port().
- Many comments were added and improved; there had been a lot of
comment rot in previous versions.
CERT: VU#784996
Reported-by: "Ronny L. Bull - bullrl" <bullrl@clarkson.edu>
Reported-at: http://www.irongeek.com/i.php?page=videos/derbycon4/t314-exploring-layer-2-network-security-in-virtualized-environments-ronny-l-bull-dr-jeanna-n-matthews
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2015-02-11 23:34:50 -08:00
|
|
|
|
|| mac_entry_is_grat_arp_locked(mac))) {
|
2014-04-11 18:34:14 -03:00
|
|
|
|
ovs_rwlock_unlock(&xbridge->ml->rwlock);
|
|
|
|
|
xlate_report(ctx, "SLB bond thinks this packet looped back, "
|
|
|
|
|
"dropping");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
ovs_rwlock_unlock(&xbridge->ml->rwlock);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-06 20:35:29 -07:00
|
|
|
|
/* Checks whether a MAC learning update is necessary for MAC learning table
|
|
|
|
|
* 'ml' given that a packet matching 'flow' was received on 'in_xbundle' in
|
|
|
|
|
* 'vlan'.
|
|
|
|
|
*
|
|
|
|
|
* Most packets processed through the MAC learning table do not actually
|
|
|
|
|
* change it in any way. This function requires only a read lock on the MAC
|
|
|
|
|
* learning table, so it is much cheaper in this common case.
|
|
|
|
|
*
|
|
|
|
|
* Keep the code here synchronized with that in update_learning_table__()
|
|
|
|
|
* below. */
|
|
|
|
|
static bool
|
|
|
|
|
is_mac_learning_update_needed(const struct mac_learning *ml,
|
|
|
|
|
const struct flow *flow,
|
|
|
|
|
struct flow_wildcards *wc,
|
|
|
|
|
int vlan, struct xbundle *in_xbundle)
|
2013-11-12 18:18:01 -08:00
|
|
|
|
OVS_REQ_RDLOCK(ml->rwlock)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
|
|
|
|
struct mac_entry *mac;
|
|
|
|
|
|
2013-08-06 20:35:29 -07:00
|
|
|
|
if (!mac_learning_may_learn(ml, flow->dl_src, vlan)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mac = mac_learning_lookup(ml, flow->dl_src, vlan);
|
|
|
|
|
if (!mac || mac_entry_age(ml, mac)) {
|
|
|
|
|
return true;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-08-06 20:35:29 -07:00
|
|
|
|
if (is_gratuitous_arp(flow, wc)) {
|
|
|
|
|
/* We don't want to learn from gratuitous ARP packets that are
|
|
|
|
|
* reflected back over bond slaves so we lock the learning table. */
|
|
|
|
|
if (!in_xbundle->bond) {
|
|
|
|
|
return true;
|
|
|
|
|
} else if (mac_entry_is_grat_arp_locked(mac)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
mac-learning: Implement per-port MAC learning fairness.
In "MAC flooding", an attacker transmits an overwhelming number of frames
with unique Ethernet source address on a switch port. The goal is to
force the switch to evict all useful MAC learning table entries, so that
its behavior degenerates to that of a hub, flooding all traffic. In turn,
that allows an attacker to eavesdrop on the traffic of other hosts attached
to the switch, with all the risks that that entails.
Before this commit, the Open vSwitch "normal" action that implements its
standalone switch behavior (and that can be used by OpenFlow controllers
as well) was vulnerable to MAC flooding attacks. This commit fixes the
problem by implementing per-port fairness for MAC table entries: when
the MAC table is at its maximum size, MAC table eviction always deletes an
entry from the port with the most entries. Thus, MAC entries will never
be evicted from ports with only a few entries if a port with a huge number
of entries exists.
Controllers could introduce their own MAC flooding vulnerabilities into
OVS. For a controller that adds destination MAC based flows to an OpenFlow
flow table as a reaction to "packet-in" events, such a bug, if it exists,
would be in the controller code itself and would need to be fixed in the
controller. For a controller that relies on the Open vSwitch "learn"
action to add destination MAC based flows, Open vSwitch has existing
support for eviction policy similar to that implemented in this commit
through the "groups" column in the Flow_Table table documented in
ovs-vswitchd.conf.db(5); we recommend that users of "learn" not already
familiar with eviction groups to read that documentation.
In addition to implementation of per-port MAC learning fairness,
this commit includes some closely related changes:
- Access to client-provided "port" data in struct mac_entry
is now abstracted through helper functions, which makes it
easier to ensure that the per-port data structures are maintained
consistently.
- The mac_learning_changed() function, which had become trivial,
vestigial, and confusing, was removed. Its functionality was folded
into the new function mac_entry_set_port().
- Many comments were added and improved; there had been a lot of
comment rot in previous versions.
CERT: VU#784996
Reported-by: "Ronny L. Bull - bullrl" <bullrl@clarkson.edu>
Reported-at: http://www.irongeek.com/i.php?page=videos/derbycon4/t314-exploring-layer-2-network-security-in-virtualized-environments-ronny-l-bull-dr-jeanna-n-matthews
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2015-02-11 23:34:50 -08:00
|
|
|
|
return mac_entry_get_port(ml, mac) != in_xbundle->ofbundle;
|
2013-08-06 20:35:29 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Updates MAC learning table 'ml' given that a packet matching 'flow' was
|
|
|
|
|
* received on 'in_xbundle' in 'vlan'.
|
|
|
|
|
*
|
|
|
|
|
* This code repeats all the checks in is_mac_learning_update_needed() because
|
|
|
|
|
* the lock was released between there and here and thus the MAC learning state
|
|
|
|
|
* could have changed.
|
|
|
|
|
*
|
|
|
|
|
* Keep the code here synchronized with that in is_mac_learning_update_needed()
|
|
|
|
|
* above. */
|
|
|
|
|
static void
|
|
|
|
|
update_learning_table__(const struct xbridge *xbridge,
|
|
|
|
|
const struct flow *flow, struct flow_wildcards *wc,
|
|
|
|
|
int vlan, struct xbundle *in_xbundle)
|
2013-11-12 18:18:01 -08:00
|
|
|
|
OVS_REQ_WRLOCK(xbridge->ml->rwlock)
|
2013-08-06 20:35:29 -07:00
|
|
|
|
{
|
|
|
|
|
struct mac_entry *mac;
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) {
|
2013-08-06 20:35:29 -07:00
|
|
|
|
return;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (is_gratuitous_arp(flow, wc)) {
|
|
|
|
|
/* We don't want to learn from gratuitous ARP packets that are
|
|
|
|
|
* reflected back over bond slaves so we lock the learning table. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!in_xbundle->bond) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
mac_entry_set_grat_arp_lock(mac);
|
|
|
|
|
} else if (mac_entry_is_grat_arp_locked(mac)) {
|
2013-08-06 20:35:29 -07:00
|
|
|
|
return;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
mac-learning: Implement per-port MAC learning fairness.
In "MAC flooding", an attacker transmits an overwhelming number of frames
with unique Ethernet source address on a switch port. The goal is to
force the switch to evict all useful MAC learning table entries, so that
its behavior degenerates to that of a hub, flooding all traffic. In turn,
that allows an attacker to eavesdrop on the traffic of other hosts attached
to the switch, with all the risks that that entails.
Before this commit, the Open vSwitch "normal" action that implements its
standalone switch behavior (and that can be used by OpenFlow controllers
as well) was vulnerable to MAC flooding attacks. This commit fixes the
problem by implementing per-port fairness for MAC table entries: when
the MAC table is at its maximum size, MAC table eviction always deletes an
entry from the port with the most entries. Thus, MAC entries will never
be evicted from ports with only a few entries if a port with a huge number
of entries exists.
Controllers could introduce their own MAC flooding vulnerabilities into
OVS. For a controller that adds destination MAC based flows to an OpenFlow
flow table as a reaction to "packet-in" events, such a bug, if it exists,
would be in the controller code itself and would need to be fixed in the
controller. For a controller that relies on the Open vSwitch "learn"
action to add destination MAC based flows, Open vSwitch has existing
support for eviction policy similar to that implemented in this commit
through the "groups" column in the Flow_Table table documented in
ovs-vswitchd.conf.db(5); we recommend that users of "learn" not already
familiar with eviction groups to read that documentation.
In addition to implementation of per-port MAC learning fairness,
this commit includes some closely related changes:
- Access to client-provided "port" data in struct mac_entry
is now abstracted through helper functions, which makes it
easier to ensure that the per-port data structures are maintained
consistently.
- The mac_learning_changed() function, which had become trivial,
vestigial, and confusing, was removed. Its functionality was folded
into the new function mac_entry_set_port().
- Many comments were added and improved; there had been a lot of
comment rot in previous versions.
CERT: VU#784996
Reported-by: "Ronny L. Bull - bullrl" <bullrl@clarkson.edu>
Reported-at: http://www.irongeek.com/i.php?page=videos/derbycon4/t314-exploring-layer-2-network-security-in-virtualized-environments-ronny-l-bull-dr-jeanna-n-matthews
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2015-02-11 23:34:50 -08:00
|
|
|
|
if (mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
/* The log messages here could actually be useful in debugging,
|
|
|
|
|
* so keep the rate limit relatively high. */
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
|
2013-08-06 20:35:29 -07:00
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
|
|
|
|
|
"on port %s in VLAN %d",
|
2013-06-13 18:38:24 -07:00
|
|
|
|
xbridge->name, ETH_ADDR_ARGS(flow->dl_src),
|
|
|
|
|
in_xbundle->name, vlan);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
mac-learning: Implement per-port MAC learning fairness.
In "MAC flooding", an attacker transmits an overwhelming number of frames
with unique Ethernet source address on a switch port. The goal is to
force the switch to evict all useful MAC learning table entries, so that
its behavior degenerates to that of a hub, flooding all traffic. In turn,
that allows an attacker to eavesdrop on the traffic of other hosts attached
to the switch, with all the risks that that entails.
Before this commit, the Open vSwitch "normal" action that implements its
standalone switch behavior (and that can be used by OpenFlow controllers
as well) was vulnerable to MAC flooding attacks. This commit fixes the
problem by implementing per-port fairness for MAC table entries: when
the MAC table is at its maximum size, MAC table eviction always deletes an
entry from the port with the most entries. Thus, MAC entries will never
be evicted from ports with only a few entries if a port with a huge number
of entries exists.
Controllers could introduce their own MAC flooding vulnerabilities into
OVS. For a controller that adds destination MAC based flows to an OpenFlow
flow table as a reaction to "packet-in" events, such a bug, if it exists,
would be in the controller code itself and would need to be fixed in the
controller. For a controller that relies on the Open vSwitch "learn"
action to add destination MAC based flows, Open vSwitch has existing
support for eviction policy similar to that implemented in this commit
through the "groups" column in the Flow_Table table documented in
ovs-vswitchd.conf.db(5); we recommend that users of "learn" not already
familiar with eviction groups to read that documentation.
In addition to implementation of per-port MAC learning fairness,
this commit includes some closely related changes:
- Access to client-provided "port" data in struct mac_entry
is now abstracted through helper functions, which makes it
easier to ensure that the per-port data structures are maintained
consistently.
- The mac_learning_changed() function, which had become trivial,
vestigial, and confusing, was removed. Its functionality was folded
into the new function mac_entry_set_port().
- Many comments were added and improved; there had been a lot of
comment rot in previous versions.
CERT: VU#784996
Reported-by: "Ronny L. Bull - bullrl" <bullrl@clarkson.edu>
Reported-at: http://www.irongeek.com/i.php?page=videos/derbycon4/t314-exploring-layer-2-network-security-in-virtualized-environments-ronny-l-bull-dr-jeanna-n-matthews
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2015-02-11 23:34:50 -08:00
|
|
|
|
mac_entry_set_port(xbridge->ml, mac, in_xbundle->ofbundle);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2013-08-06 20:35:29 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
update_learning_table(const struct xbridge *xbridge,
|
|
|
|
|
const struct flow *flow, struct flow_wildcards *wc,
|
|
|
|
|
int vlan, struct xbundle *in_xbundle)
|
|
|
|
|
{
|
|
|
|
|
bool need_update;
|
|
|
|
|
|
|
|
|
|
/* Don't learn the OFPP_NONE port. */
|
|
|
|
|
if (in_xbundle == &ofpp_none_bundle) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* First try the common case: no change to MAC learning table. */
|
|
|
|
|
ovs_rwlock_rdlock(&xbridge->ml->rwlock);
|
|
|
|
|
need_update = is_mac_learning_update_needed(xbridge->ml, flow, wc, vlan,
|
|
|
|
|
in_xbundle);
|
2013-07-22 11:11:54 -07:00
|
|
|
|
ovs_rwlock_unlock(&xbridge->ml->rwlock);
|
2013-08-06 20:35:29 -07:00
|
|
|
|
|
|
|
|
|
if (need_update) {
|
|
|
|
|
/* Slow path: MAC learning table might need an update. */
|
|
|
|
|
ovs_rwlock_wrlock(&xbridge->ml->rwlock);
|
|
|
|
|
update_learning_table__(xbridge, flow, wc, vlan, in_xbundle);
|
|
|
|
|
ovs_rwlock_unlock(&xbridge->ml->rwlock);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-06-18 22:14:34 -03:00
|
|
|
|
/* Updates multicast snooping table 'ms' given that a packet matching 'flow'
|
|
|
|
|
* was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
|
|
|
|
|
static void
|
2015-07-01 16:12:12 -03:00
|
|
|
|
update_mcast_snooping_table4__(const struct xbridge *xbridge,
|
|
|
|
|
const struct flow *flow,
|
|
|
|
|
struct mcast_snooping *ms, int vlan,
|
|
|
|
|
struct xbundle *in_xbundle,
|
|
|
|
|
const struct dp_packet *packet)
|
2014-06-18 22:14:34 -03:00
|
|
|
|
OVS_REQ_WRLOCK(ms->rwlock)
|
|
|
|
|
{
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
|
2015-06-17 14:12:20 -03:00
|
|
|
|
int count;
|
2015-07-01 16:12:12 -03:00
|
|
|
|
ovs_be32 ip4 = flow->igmp_group_ip4;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
|
|
|
|
|
switch (ntohs(flow->tp_src)) {
|
|
|
|
|
case IGMP_HOST_MEMBERSHIP_REPORT:
|
|
|
|
|
case IGMPV2_HOST_MEMBERSHIP_REPORT:
|
2015-07-01 16:12:11 -03:00
|
|
|
|
if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: multicast snooping learned that "
|
|
|
|
|
IP_FMT" is on port %s in VLAN %d",
|
|
|
|
|
xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case IGMP_HOST_LEAVE_MESSAGE:
|
2015-07-01 16:12:11 -03:00
|
|
|
|
if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: multicast snooping leaving "
|
|
|
|
|
IP_FMT" is on port %s in VLAN %d",
|
|
|
|
|
xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case IGMP_HOST_MEMBERSHIP_QUERY:
|
|
|
|
|
if (flow->nw_src && mcast_snooping_add_mrouter(ms, vlan,
|
|
|
|
|
in_xbundle->ofbundle)) {
|
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query from "
|
|
|
|
|
IP_FMT" is on port %s in VLAN %d",
|
|
|
|
|
xbridge->name, IP_ARGS(flow->nw_src),
|
|
|
|
|
in_xbundle->name, vlan);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2015-06-17 14:12:20 -03:00
|
|
|
|
case IGMPV3_HOST_MEMBERSHIP_REPORT:
|
|
|
|
|
if ((count = mcast_snooping_add_report(ms, packet, vlan,
|
|
|
|
|
in_xbundle->ofbundle))) {
|
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
|
|
|
|
|
"addresses on port %s in VLAN %d",
|
|
|
|
|
xbridge->name, count, in_xbundle->name, vlan);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-01 16:12:12 -03:00
|
|
|
|
static void
|
|
|
|
|
update_mcast_snooping_table6__(const struct xbridge *xbridge,
|
|
|
|
|
const struct flow *flow,
|
|
|
|
|
struct mcast_snooping *ms, int vlan,
|
|
|
|
|
struct xbundle *in_xbundle,
|
|
|
|
|
const struct dp_packet *packet)
|
|
|
|
|
OVS_REQ_WRLOCK(ms->rwlock)
|
|
|
|
|
{
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
|
|
|
|
|
int count;
|
|
|
|
|
|
|
|
|
|
switch (ntohs(flow->tp_src)) {
|
|
|
|
|
case MLD_QUERY:
|
|
|
|
|
if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
|
|
|
|
|
&& mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
|
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query on port %s"
|
|
|
|
|
"in VLAN %d",
|
|
|
|
|
xbridge->name, in_xbundle->name, vlan);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case MLD_REPORT:
|
|
|
|
|
case MLD_DONE:
|
|
|
|
|
case MLD2_REPORT:
|
|
|
|
|
count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
|
|
|
|
|
if (count) {
|
|
|
|
|
VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
|
|
|
|
|
"addresses on port %s in VLAN %d",
|
|
|
|
|
xbridge->name, count, in_xbundle->name, vlan);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-18 22:14:34 -03:00
|
|
|
|
/* Updates multicast snooping table 'ms' given that a packet matching 'flow'
|
|
|
|
|
* was received on 'in_xbundle' in 'vlan'. */
|
|
|
|
|
static void
|
|
|
|
|
update_mcast_snooping_table(const struct xbridge *xbridge,
|
|
|
|
|
const struct flow *flow, int vlan,
|
2015-06-17 14:12:20 -03:00
|
|
|
|
struct xbundle *in_xbundle,
|
|
|
|
|
const struct dp_packet *packet)
|
2014-06-18 22:14:34 -03:00
|
|
|
|
{
|
|
|
|
|
struct mcast_snooping *ms = xbridge->ms;
|
|
|
|
|
struct xlate_cfg *xcfg;
|
|
|
|
|
struct xbundle *mcast_xbundle;
|
2014-12-11 09:38:18 -02:00
|
|
|
|
struct mcast_port_bundle *fport;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
|
|
|
|
|
/* Don't learn the OFPP_NONE port. */
|
|
|
|
|
if (in_xbundle == &ofpp_none_bundle) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Don't learn from flood ports */
|
|
|
|
|
mcast_xbundle = NULL;
|
|
|
|
|
ovs_rwlock_wrlock(&ms->rwlock);
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2014-12-11 09:38:18 -02:00
|
|
|
|
LIST_FOR_EACH(fport, node, &ms->fport_list) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
mcast_xbundle = xbundle_lookup(xcfg, fport->port);
|
|
|
|
|
if (mcast_xbundle == in_xbundle) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
|
2015-07-01 16:12:12 -03:00
|
|
|
|
if (flow->dl_type == htons(ETH_TYPE_IP)) {
|
|
|
|
|
update_mcast_snooping_table4__(xbridge, flow, ms, vlan,
|
|
|
|
|
in_xbundle, packet);
|
|
|
|
|
} else {
|
|
|
|
|
update_mcast_snooping_table6__(xbridge, flow, ms, vlan,
|
|
|
|
|
in_xbundle, packet);
|
|
|
|
|
}
|
2014-06-18 22:14:34 -03:00
|
|
|
|
}
|
|
|
|
|
ovs_rwlock_unlock(&ms->rwlock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* send the packet to ports having the multicast group learned */
|
|
|
|
|
static void
|
|
|
|
|
xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
|
|
|
|
|
struct mcast_snooping *ms OVS_UNUSED,
|
|
|
|
|
struct mcast_group *grp,
|
|
|
|
|
struct xbundle *in_xbundle, uint16_t vlan)
|
|
|
|
|
OVS_REQ_RDLOCK(ms->rwlock)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_cfg *xcfg;
|
|
|
|
|
struct mcast_group_bundle *b;
|
|
|
|
|
struct xbundle *mcast_xbundle;
|
|
|
|
|
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
|
|
|
|
|
mcast_xbundle = xbundle_lookup(xcfg, b->port);
|
|
|
|
|
if (mcast_xbundle && mcast_xbundle != in_xbundle) {
|
|
|
|
|
xlate_report(ctx, "forwarding to mcast group port");
|
|
|
|
|
output_normal(ctx, mcast_xbundle, vlan);
|
|
|
|
|
} else if (!mcast_xbundle) {
|
|
|
|
|
xlate_report(ctx, "mcast group port is unknown, dropping");
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "mcast group port is input port, dropping");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* send the packet to ports connected to multicast routers */
|
|
|
|
|
static void
|
|
|
|
|
xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
|
|
|
|
|
struct mcast_snooping *ms,
|
|
|
|
|
struct xbundle *in_xbundle, uint16_t vlan)
|
|
|
|
|
OVS_REQ_RDLOCK(ms->rwlock)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_cfg *xcfg;
|
|
|
|
|
struct mcast_mrouter_bundle *mrouter;
|
|
|
|
|
struct xbundle *mcast_xbundle;
|
|
|
|
|
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
LIST_FOR_EACH(mrouter, mrouter_node, &ms->mrouter_lru) {
|
|
|
|
|
mcast_xbundle = xbundle_lookup(xcfg, mrouter->port);
|
|
|
|
|
if (mcast_xbundle && mcast_xbundle != in_xbundle) {
|
|
|
|
|
xlate_report(ctx, "forwarding to mcast router port");
|
|
|
|
|
output_normal(ctx, mcast_xbundle, vlan);
|
|
|
|
|
} else if (!mcast_xbundle) {
|
|
|
|
|
xlate_report(ctx, "mcast router port is unknown, dropping");
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "mcast router port is input port, dropping");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* send the packet to ports flagged to be flooded */
|
|
|
|
|
static void
|
|
|
|
|
xlate_normal_mcast_send_fports(struct xlate_ctx *ctx,
|
|
|
|
|
struct mcast_snooping *ms,
|
|
|
|
|
struct xbundle *in_xbundle, uint16_t vlan)
|
|
|
|
|
OVS_REQ_RDLOCK(ms->rwlock)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_cfg *xcfg;
|
2014-12-11 09:38:18 -02:00
|
|
|
|
struct mcast_port_bundle *fport;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
struct xbundle *mcast_xbundle;
|
|
|
|
|
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2014-12-11 09:38:18 -02:00
|
|
|
|
LIST_FOR_EACH(fport, node, &ms->fport_list) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
mcast_xbundle = xbundle_lookup(xcfg, fport->port);
|
|
|
|
|
if (mcast_xbundle && mcast_xbundle != in_xbundle) {
|
|
|
|
|
xlate_report(ctx, "forwarding to mcast flood port");
|
|
|
|
|
output_normal(ctx, mcast_xbundle, vlan);
|
|
|
|
|
} else if (!mcast_xbundle) {
|
|
|
|
|
xlate_report(ctx, "mcast flood port is unknown, dropping");
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "mcast flood port is input port, dropping");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-11 09:38:19 -02:00
|
|
|
|
/* forward the Reports to configured ports */
|
|
|
|
|
static void
|
|
|
|
|
xlate_normal_mcast_send_rports(struct xlate_ctx *ctx,
|
|
|
|
|
struct mcast_snooping *ms,
|
|
|
|
|
struct xbundle *in_xbundle, uint16_t vlan)
|
|
|
|
|
OVS_REQ_RDLOCK(ms->rwlock)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_cfg *xcfg;
|
|
|
|
|
struct mcast_port_bundle *rport;
|
|
|
|
|
struct xbundle *mcast_xbundle;
|
|
|
|
|
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
LIST_FOR_EACH(rport, node, &ms->rport_list) {
|
|
|
|
|
mcast_xbundle = xbundle_lookup(xcfg, rport->port);
|
|
|
|
|
if (mcast_xbundle && mcast_xbundle != in_xbundle) {
|
|
|
|
|
xlate_report(ctx, "forwarding Report to mcast flagged port");
|
|
|
|
|
output_normal(ctx, mcast_xbundle, vlan);
|
|
|
|
|
} else if (!mcast_xbundle) {
|
|
|
|
|
xlate_report(ctx, "mcast port is unknown, dropping the Report");
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "mcast port is input port, dropping the Report");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-11 18:34:15 -03:00
|
|
|
|
static void
|
|
|
|
|
xlate_normal_flood(struct xlate_ctx *ctx, struct xbundle *in_xbundle,
|
|
|
|
|
uint16_t vlan)
|
|
|
|
|
{
|
|
|
|
|
struct xbundle *xbundle;
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (xbundle, list_node, &ctx->xbridge->xbundles) {
|
|
|
|
|
if (xbundle != in_xbundle
|
|
|
|
|
&& xbundle_includes_vlan(xbundle, vlan)
|
|
|
|
|
&& xbundle->floodable
|
|
|
|
|
&& !xbundle_mirror_out(ctx->xbridge, xbundle)) {
|
|
|
|
|
output_normal(ctx, xbundle, vlan);
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx->nf_output_iface = NF_OUT_FLOOD;
|
2014-04-11 18:34:15 -03:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-08 10:34:10 -07:00
|
|
|
|
static bool
|
|
|
|
|
is_ip_local_multicast(const struct flow *flow, struct flow_wildcards *wc)
|
|
|
|
|
{
|
|
|
|
|
if (flow->dl_type == htons(ETH_TYPE_IP)) {
|
|
|
|
|
memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
|
|
|
|
|
return ip_is_local_multicast(flow->nw_dst);
|
|
|
|
|
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
|
|
|
|
|
memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
|
|
|
|
|
return ipv6_is_all_hosts(&flow->ipv6_dst);
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_normal(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc = ctx->wc;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
struct xbundle *in_xbundle;
|
|
|
|
|
struct xport *in_port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
struct mac_entry *mac;
|
2013-09-03 17:34:00 -07:00
|
|
|
|
void *mac_port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint16_t vlan;
|
|
|
|
|
uint16_t vid;
|
|
|
|
|
|
2013-06-12 14:37:18 -07:00
|
|
|
|
memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
|
|
|
|
|
memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
|
2013-06-18 23:55:47 -07:00
|
|
|
|
wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
in_xbundle = lookup_input_bundle(ctx->xbridge, flow->in_port.ofp_port,
|
|
|
|
|
ctx->xin->packet != NULL, &in_port);
|
|
|
|
|
if (!in_xbundle) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_report(ctx, "no input bundle, dropping");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Drop malformed frames. */
|
2013-06-12 14:37:18 -07:00
|
|
|
|
if (flow->dl_type == htons(ETH_TYPE_VLAN) &&
|
|
|
|
|
!(flow->vlan_tci & htons(VLAN_CFI))) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (ctx->xin->packet != NULL) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial "
|
|
|
|
|
"VLAN tag received on port %s",
|
2013-06-13 18:38:24 -07:00
|
|
|
|
ctx->xbridge->name, in_xbundle->name);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
xlate_report(ctx, "partial VLAN tag, dropping");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Drop frames on bundles reserved for mirroring. */
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xbundle_mirror_out(ctx->xbridge, in_xbundle)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (ctx->xin->packet != NULL) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
|
|
|
|
|
"%s, which is reserved exclusively for mirroring",
|
2013-06-13 18:38:24 -07:00
|
|
|
|
ctx->xbridge->name, in_xbundle->name);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
xlate_report(ctx, "input port is mirror output port, dropping");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check VLAN. */
|
2013-06-12 14:37:18 -07:00
|
|
|
|
vid = vlan_tci_to_vid(flow->vlan_tci);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_report(ctx, "disallowed VLAN VID for this input port, dropping");
|
|
|
|
|
return;
|
|
|
|
|
}
|
2013-06-13 18:38:24 -07:00
|
|
|
|
vlan = input_vid_to_vlan(in_xbundle, vid);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
/* Check other admissibility requirements. */
|
|
|
|
|
if (in_port && !is_admissible(ctx, in_port, vlan)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Learn source MAC. */
|
|
|
|
|
if (ctx->xin->may_learn) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
/* Save enough info to update mac learning table later. */
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
|
2014-05-21 20:45:24 -07:00
|
|
|
|
entry->u.normal.ofproto = ctx->xbridge->ofproto;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
entry->u.normal.flow = xmemdup(flow, sizeof *flow);
|
|
|
|
|
entry->u.normal.vlan = vlan;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
/* Determine output bundle. */
|
2014-06-18 22:14:34 -03:00
|
|
|
|
if (mcast_snooping_enabled(ctx->xbridge->ms)
|
|
|
|
|
&& !eth_addr_is_broadcast(flow->dl_dst)
|
|
|
|
|
&& eth_addr_is_multicast(flow->dl_dst)
|
2015-07-01 16:12:12 -03:00
|
|
|
|
&& is_ip_any(flow)) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
struct mcast_snooping *ms = ctx->xbridge->ms;
|
2015-07-01 16:12:12 -03:00
|
|
|
|
struct mcast_group *grp = NULL;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
|
2016-05-08 10:34:10 -07:00
|
|
|
|
if (is_igmp(flow, wc)) {
|
|
|
|
|
memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
|
2015-06-17 14:12:19 -03:00
|
|
|
|
if (mcast_snooping_is_membership(flow->tp_src) ||
|
|
|
|
|
mcast_snooping_is_query(flow->tp_src)) {
|
2016-02-17 12:43:56 -02:00
|
|
|
|
if (ctx->xin->may_learn && ctx->xin->packet) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
update_mcast_snooping_table(ctx->xbridge, flow, vlan,
|
2015-06-17 14:12:20 -03:00
|
|
|
|
in_xbundle, ctx->xin->packet);
|
2015-06-17 14:12:19 -03:00
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
* IGMP packets need to take the slow path, in order to be
|
|
|
|
|
* processed for mdb updates. That will prevent expires
|
|
|
|
|
* firing off even after hosts have sent reports.
|
|
|
|
|
*/
|
|
|
|
|
ctx->xout->slow |= SLOW_ACTION;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
}
|
2013-09-03 17:34:00 -07:00
|
|
|
|
|
2014-06-18 22:14:34 -03:00
|
|
|
|
if (mcast_snooping_is_membership(flow->tp_src)) {
|
|
|
|
|
ovs_rwlock_rdlock(&ms->rwlock);
|
|
|
|
|
xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
|
2014-12-11 09:38:19 -02:00
|
|
|
|
/* RFC4541: section 2.1.1, item 1: A snooping switch should
|
|
|
|
|
* forward IGMP Membership Reports only to those ports where
|
|
|
|
|
* multicast routers are attached. Alternatively stated: a
|
|
|
|
|
* snooping switch should not forward IGMP Membership Reports
|
|
|
|
|
* to ports on which only hosts are attached.
|
|
|
|
|
* An administrative control may be provided to override this
|
|
|
|
|
* restriction, allowing the report messages to be flooded to
|
|
|
|
|
* other ports. */
|
|
|
|
|
xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
|
2014-06-18 22:14:34 -03:00
|
|
|
|
ovs_rwlock_unlock(&ms->rwlock);
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "multicast traffic, flooding");
|
|
|
|
|
xlate_normal_flood(ctx, in_xbundle, vlan);
|
|
|
|
|
}
|
|
|
|
|
return;
|
2016-05-08 10:34:10 -07:00
|
|
|
|
} else if (is_mld(flow, wc)) {
|
2015-07-01 16:12:12 -03:00
|
|
|
|
ctx->xout->slow |= SLOW_ACTION;
|
2016-02-17 12:43:56 -02:00
|
|
|
|
if (ctx->xin->may_learn && ctx->xin->packet) {
|
2015-07-01 16:12:12 -03:00
|
|
|
|
update_mcast_snooping_table(ctx->xbridge, flow, vlan,
|
|
|
|
|
in_xbundle, ctx->xin->packet);
|
|
|
|
|
}
|
2016-05-08 10:34:10 -07:00
|
|
|
|
if (is_mld_report(flow, wc)) {
|
2015-07-01 16:12:12 -03:00
|
|
|
|
ovs_rwlock_rdlock(&ms->rwlock);
|
|
|
|
|
xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
|
|
|
|
|
xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
|
|
|
|
|
ovs_rwlock_unlock(&ms->rwlock);
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "MLD query, flooding");
|
|
|
|
|
xlate_normal_flood(ctx, in_xbundle, vlan);
|
|
|
|
|
}
|
2014-06-18 22:14:34 -03:00
|
|
|
|
} else {
|
2016-05-08 10:34:10 -07:00
|
|
|
|
if (is_ip_local_multicast(flow, wc)) {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
/* RFC4541: section 2.1.2, item 2: Packets with a dst IP
|
|
|
|
|
* address in the 224.0.0.x range which are not IGMP must
|
|
|
|
|
* be forwarded on all ports */
|
|
|
|
|
xlate_report(ctx, "RFC4541: section 2.1.2, item 2, flooding");
|
|
|
|
|
xlate_normal_flood(ctx, in_xbundle, vlan);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* forwarding to group base ports */
|
|
|
|
|
ovs_rwlock_rdlock(&ms->rwlock);
|
2015-07-01 16:12:12 -03:00
|
|
|
|
if (flow->dl_type == htons(ETH_TYPE_IP)) {
|
|
|
|
|
grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
|
|
|
|
|
} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
|
|
|
|
|
grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
|
|
|
|
|
}
|
2014-06-18 22:14:34 -03:00
|
|
|
|
if (grp) {
|
|
|
|
|
xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, vlan);
|
|
|
|
|
xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
|
|
|
|
|
xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
if (mcast_snooping_flood_unreg(ms)) {
|
|
|
|
|
xlate_report(ctx, "unregistered multicast, flooding");
|
|
|
|
|
xlate_normal_flood(ctx, in_xbundle, vlan);
|
|
|
|
|
} else {
|
|
|
|
|
xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
|
|
|
|
|
xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2014-06-18 22:14:34 -03:00
|
|
|
|
ovs_rwlock_unlock(&ms->rwlock);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
2014-06-18 22:14:34 -03:00
|
|
|
|
ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
|
|
|
|
|
mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
|
mac-learning: Implement per-port MAC learning fairness.
In "MAC flooding", an attacker transmits an overwhelming number of frames
with unique Ethernet source address on a switch port. The goal is to
force the switch to evict all useful MAC learning table entries, so that
its behavior degenerates to that of a hub, flooding all traffic. In turn,
that allows an attacker to eavesdrop on the traffic of other hosts attached
to the switch, with all the risks that that entails.
Before this commit, the Open vSwitch "normal" action that implements its
standalone switch behavior (and that can be used by OpenFlow controllers
as well) was vulnerable to MAC flooding attacks. This commit fixes the
problem by implementing per-port fairness for MAC table entries: when
the MAC table is at its maximum size, MAC table eviction always deletes an
entry from the port with the most entries. Thus, MAC entries will never
be evicted from ports with only a few entries if a port with a huge number
of entries exists.
Controllers could introduce their own MAC flooding vulnerabilities into
OVS. For a controller that adds destination MAC based flows to an OpenFlow
flow table as a reaction to "packet-in" events, such a bug, if it exists,
would be in the controller code itself and would need to be fixed in the
controller. For a controller that relies on the Open vSwitch "learn"
action to add destination MAC based flows, Open vSwitch has existing
support for eviction policy similar to that implemented in this commit
through the "groups" column in the Flow_Table table documented in
ovs-vswitchd.conf.db(5); we recommend that users of "learn" not already
familiar with eviction groups to read that documentation.
In addition to implementation of per-port MAC learning fairness,
this commit includes some closely related changes:
- Access to client-provided "port" data in struct mac_entry
is now abstracted through helper functions, which makes it
easier to ensure that the per-port data structures are maintained
consistently.
- The mac_learning_changed() function, which had become trivial,
vestigial, and confusing, was removed. Its functionality was folded
into the new function mac_entry_set_port().
- Many comments were added and improved; there had been a lot of
comment rot in previous versions.
CERT: VU#784996
Reported-by: "Ronny L. Bull - bullrl" <bullrl@clarkson.edu>
Reported-at: http://www.irongeek.com/i.php?page=videos/derbycon4/t314-exploring-layer-2-network-security-in-virtualized-environments-ronny-l-bull-dr-jeanna-n-matthews
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
2015-02-11 23:34:50 -08:00
|
|
|
|
mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;
|
2014-06-18 22:14:34 -03:00
|
|
|
|
ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
|
|
|
|
|
|
|
|
|
|
if (mac_port) {
|
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port);
|
|
|
|
|
if (mac_xbundle && mac_xbundle != in_xbundle) {
|
|
|
|
|
xlate_report(ctx, "forwarding to learned port");
|
|
|
|
|
output_normal(ctx, mac_xbundle, vlan);
|
|
|
|
|
} else if (!mac_xbundle) {
|
|
|
|
|
xlate_report(ctx, "learned port is unknown, dropping");
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "learned port is input port, dropping");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "no learned MAC for destination, flooding");
|
|
|
|
|
xlate_normal_flood(ctx, in_xbundle, vlan);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
/* Appends a "sample" action for sFlow or IPFIX to 'ctx->odp_actions'. The
|
|
|
|
|
* 'probability' is the number of packets out of UINT32_MAX to sample. The
|
|
|
|
|
* 'cookie' (of length 'cookie_size' bytes) is passed back in the callback for
|
|
|
|
|
* each sampled packet. 'tunnel_out_port', if not ODPP_NONE, is added as the
|
|
|
|
|
* OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute. If 'include_actions', an
|
|
|
|
|
* OVS_USERSPACE_ATTR_ACTIONS attribute is added.
|
2013-06-11 13:32:30 -07:00
|
|
|
|
*/
|
|
|
|
|
static size_t
|
2015-07-29 15:24:05 -07:00
|
|
|
|
compose_sample_action(struct xlate_ctx *ctx,
|
2013-06-11 13:32:30 -07:00
|
|
|
|
const uint32_t probability,
|
|
|
|
|
const union user_action_cookie *cookie,
|
2014-08-17 20:19:36 -07:00
|
|
|
|
const size_t cookie_size,
|
2015-07-17 21:37:02 -07:00
|
|
|
|
const odp_port_t tunnel_out_port,
|
|
|
|
|
bool include_actions)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2015-07-29 15:24:05 -07:00
|
|
|
|
size_t sample_offset = nl_msg_start_nested(ctx->odp_actions,
|
|
|
|
|
OVS_ACTION_ATTR_SAMPLE);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
size_t actions_offset = nl_msg_start_nested(ctx->odp_actions,
|
|
|
|
|
OVS_SAMPLE_ATTR_ACTIONS);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
odp_port_t odp_port = ofp_port_to_odp_port(
|
|
|
|
|
ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
|
|
|
|
|
uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
|
|
|
|
|
flow_hash_5tuple(&ctx->xin->flow, 0));
|
|
|
|
|
int cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size,
|
|
|
|
|
tunnel_out_port,
|
|
|
|
|
include_actions,
|
|
|
|
|
ctx->odp_actions);
|
2013-07-06 11:46:48 -07:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
nl_msg_end_nested(ctx->odp_actions, actions_offset);
|
|
|
|
|
nl_msg_end_nested(ctx->odp_actions, sample_offset);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
return cookie_offset;
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
/* If sFLow is not enabled, returns 0 without doing anything.
|
|
|
|
|
*
|
|
|
|
|
* If sFlow is enabled, appends a template "sample" action to the ODP actions
|
|
|
|
|
* in 'ctx'. This action is a template because some of the information needed
|
|
|
|
|
* to fill it out is not available until flow translation is complete. In this
|
|
|
|
|
* case, this functions returns an offset, which is always nonzero, to pass
|
|
|
|
|
* later to fix_sflow_action() to fill in the rest of the template. */
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static size_t
|
2015-07-29 15:24:05 -07:00
|
|
|
|
compose_sflow_action(struct xlate_ctx *ctx)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2015-07-29 15:24:05 -07:00
|
|
|
|
struct dpif_sflow *sflow = ctx->xbridge->sflow;
|
|
|
|
|
if (!sflow || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
union user_action_cookie cookie = { .type = USER_ACTION_COOKIE_SFLOW };
|
|
|
|
|
return compose_sample_action(ctx, dpif_sflow_get_probability(sflow),
|
2015-07-17 21:37:02 -07:00
|
|
|
|
&cookie, sizeof cookie.sflow, ODPP_NONE,
|
|
|
|
|
true);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
/* If IPFIX is enabled, this appends a "sample" action to implement IPFIX to
|
|
|
|
|
* 'ctx->odp_actions'. */
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
2015-07-29 15:24:05 -07:00
|
|
|
|
compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2015-07-29 15:24:05 -07:00
|
|
|
|
struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
|
2014-08-17 20:19:36 -07:00
|
|
|
|
odp_port_t tunnel_out_port = ODPP_NONE;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-17 20:19:36 -07:00
|
|
|
|
/* For input case, output_odp_port is ODPP_NONE, which is an invalid port
|
|
|
|
|
* number. */
|
|
|
|
|
if (output_odp_port == ODPP_NONE &&
|
2015-07-29 15:24:05 -07:00
|
|
|
|
!dpif_ipfix_get_bridge_exporter_input_sampling(ipfix)) {
|
2014-08-17 20:19:36 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* For output case, output_odp_port is valid*/
|
|
|
|
|
if (output_odp_port != ODPP_NONE) {
|
2015-07-29 15:24:05 -07:00
|
|
|
|
if (!dpif_ipfix_get_bridge_exporter_output_sampling(ipfix)) {
|
2014-08-17 20:19:36 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
/* If tunnel sampling is enabled, put an additional option attribute:
|
|
|
|
|
* OVS_USERSPACE_ATTR_TUNNEL_OUT_PORT
|
|
|
|
|
*/
|
2015-07-29 15:24:05 -07:00
|
|
|
|
if (dpif_ipfix_get_bridge_exporter_tunnel_sampling(ipfix) &&
|
|
|
|
|
dpif_ipfix_get_tunnel_port(ipfix, output_odp_port) ) {
|
2014-08-17 20:19:36 -07:00
|
|
|
|
tunnel_out_port = output_odp_port;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
union user_action_cookie cookie = {
|
|
|
|
|
.ipfix = {
|
|
|
|
|
.type = USER_ACTION_COOKIE_IPFIX,
|
|
|
|
|
.output_odp_port = output_odp_port,
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
compose_sample_action(ctx,
|
|
|
|
|
dpif_ipfix_get_bridge_exporter_probability(ipfix),
|
2015-07-17 21:37:02 -07:00
|
|
|
|
&cookie, sizeof cookie.ipfix, tunnel_out_port,
|
|
|
|
|
false);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
/* Fix "sample" action according to data collected while composing ODP actions,
|
|
|
|
|
* as described in compose_sflow_action().
|
|
|
|
|
*
|
|
|
|
|
* 'user_cookie_offset' must be the offset returned by add_sflow_action(). */
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
2015-07-29 15:24:05 -07:00
|
|
|
|
fix_sflow_action(struct xlate_ctx *ctx, unsigned int user_cookie_offset)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
|
|
|
|
const struct flow *base = &ctx->base_flow;
|
|
|
|
|
union user_action_cookie *cookie;
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
cookie = ofpbuf_at(ctx->odp_actions, user_cookie_offset,
|
2013-06-11 13:32:30 -07:00
|
|
|
|
sizeof cookie->sflow);
|
|
|
|
|
ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
|
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
cookie->type = USER_ACTION_COOKIE_SFLOW;
|
|
|
|
|
cookie->sflow.vlan_tci = base->vlan_tci;
|
|
|
|
|
|
|
|
|
|
/* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output
|
|
|
|
|
* port information") for the interpretation of cookie->output. */
|
|
|
|
|
switch (ctx->sflow_n_outputs) {
|
|
|
|
|
case 0:
|
|
|
|
|
/* 0x40000000 | 256 means "packet dropped for unknown reason". */
|
|
|
|
|
cookie->sflow.output = 0x40000000 | 256;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
cookie->sflow.output = dpif_sflow_odp_port_to_ifindex(
|
|
|
|
|
ctx->xbridge->sflow, ctx->sflow_odp_port);
|
|
|
|
|
if (cookie->sflow.output) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* Fall through. */
|
|
|
|
|
default:
|
|
|
|
|
/* 0x80000000 means "multiple output ports. */
|
|
|
|
|
cookie->sflow.output = 0x80000000 | ctx->sflow_n_outputs;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-23 13:39:38 -07:00
|
|
|
|
static bool
|
|
|
|
|
process_special(struct xlate_ctx *ctx, const struct xport *xport)
|
2013-06-17 17:56:54 -07:00
|
|
|
|
{
|
2015-07-23 13:39:38 -07:00
|
|
|
|
const struct flow *flow = &ctx->xin->flow;
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc = ctx->wc;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
const struct xbridge *xbridge = ctx->xbridge;
|
2015-07-23 13:39:38 -07:00
|
|
|
|
const struct dp_packet *packet = ctx->xin->packet;
|
|
|
|
|
enum slow_path_reason slow;
|
2013-06-17 18:07:33 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!xport) {
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = 0;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
} else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) {
|
2013-06-17 17:56:54 -07:00
|
|
|
|
if (packet) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
cfm_process_heartbeat(xport->cfm, packet);
|
2013-06-17 17:56:54 -07:00
|
|
|
|
}
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = SLOW_CFM;
|
2013-07-16 09:58:42 +00:00
|
|
|
|
} else if (xport->bfd && bfd_should_process_flow(xport->bfd, flow, wc)) {
|
2013-06-17 17:56:54 -07:00
|
|
|
|
if (packet) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
bfd_process_packet(xport->bfd, flow, packet);
|
2013-12-20 14:53:52 -08:00
|
|
|
|
/* If POLL received, immediately sends FINAL back. */
|
|
|
|
|
if (bfd_should_send_packet(xport->bfd)) {
|
2014-04-03 18:31:13 -07:00
|
|
|
|
ofproto_dpif_monitor_port_send_soon(xport->ofport);
|
2013-12-20 14:53:52 -08:00
|
|
|
|
}
|
2013-06-17 17:56:54 -07:00
|
|
|
|
}
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = SLOW_BFD;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
} else if (xport->xbundle && xport->xbundle->lacp
|
2013-06-17 17:56:54 -07:00
|
|
|
|
&& flow->dl_type == htons(ETH_TYPE_LACP)) {
|
|
|
|
|
if (packet) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
|
2013-06-17 17:56:54 -07:00
|
|
|
|
}
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = SLOW_LACP;
|
2014-08-22 09:01:34 -07:00
|
|
|
|
} else if ((xbridge->stp || xbridge->rstp) &&
|
|
|
|
|
stp_should_process_flow(flow, wc)) {
|
2013-06-17 17:56:54 -07:00
|
|
|
|
if (packet) {
|
2014-09-09 11:11:18 -07:00
|
|
|
|
xbridge->stp
|
|
|
|
|
? stp_process_packet(xport, packet)
|
|
|
|
|
: rstp_process_packet(xport, packet);
|
2013-06-17 17:56:54 -07:00
|
|
|
|
}
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = SLOW_STP;
|
2015-04-15 11:53:56 -04:00
|
|
|
|
} else if (xport->lldp && lldp_should_process_flow(xport->lldp, flow)) {
|
2015-02-20 14:17:10 -05:00
|
|
|
|
if (packet) {
|
|
|
|
|
lldp_process_packet(xport->lldp, packet);
|
|
|
|
|
}
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = SLOW_LLDP;
|
2013-06-17 17:56:54 -07:00
|
|
|
|
} else {
|
2015-07-23 13:39:38 -07:00
|
|
|
|
slow = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (slow) {
|
|
|
|
|
ctx->xout->slow |= slow;
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
2013-06-17 17:56:54 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 11:53:47 -08:00
|
|
|
|
static int
|
|
|
|
|
tnl_route_lookup_flow(const struct flow *oflow,
|
2016-03-24 09:30:57 -07:00
|
|
|
|
struct in6_addr *ip, struct in6_addr *src,
|
|
|
|
|
struct xport **out_port)
|
2014-11-11 11:53:47 -08:00
|
|
|
|
{
|
|
|
|
|
char out_dev[IFNAMSIZ];
|
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
struct xlate_cfg *xcfg;
|
2015-12-04 12:36:48 -02:00
|
|
|
|
struct in6_addr gw;
|
|
|
|
|
struct in6_addr dst;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
2015-12-04 12:36:48 -02:00
|
|
|
|
dst = flow_tnl_dst(&oflow->tunnel);
|
2016-03-24 09:30:57 -07:00
|
|
|
|
if (!ovs_router_lookup(&dst, out_dev, src, &gw)) {
|
2014-11-11 11:53:47 -08:00
|
|
|
|
return -ENOENT;
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-04 12:36:48 -02:00
|
|
|
|
if (ipv6_addr_is_set(&gw) &&
|
|
|
|
|
(!IN6_IS_ADDR_V4MAPPED(&gw) || in6_addr_get_mapped_ipv4(&gw))) {
|
2014-11-11 11:53:47 -08:00
|
|
|
|
*ip = gw;
|
|
|
|
|
} else {
|
2015-12-04 12:36:48 -02:00
|
|
|
|
*ip = dst;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
|
|
|
|
ovs_assert(xcfg);
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
|
|
|
|
|
if (!strncmp(xbridge->name, out_dev, IFNAMSIZ)) {
|
|
|
|
|
struct xport *port;
|
|
|
|
|
|
|
|
|
|
HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
|
|
|
|
|
if (!strncmp(netdev_get_name(port->netdev), out_dev, IFNAMSIZ)) {
|
|
|
|
|
*out_port = port;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return -ENOENT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2015-09-15 19:37:06 -07:00
|
|
|
|
compose_table_xlate(struct xlate_ctx *ctx, const struct xport *out_dev,
|
|
|
|
|
struct dp_packet *packet)
|
2014-11-11 11:53:47 -08:00
|
|
|
|
{
|
2015-09-15 19:37:06 -07:00
|
|
|
|
struct xbridge *xbridge = out_dev->xbridge;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
struct ofpact_output output;
|
|
|
|
|
struct flow flow;
|
|
|
|
|
|
|
|
|
|
ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
|
2015-02-22 03:21:09 -08:00
|
|
|
|
flow_extract(packet, &flow);
|
2015-09-15 19:37:06 -07:00
|
|
|
|
flow.in_port.ofp_port = out_dev->ofp_port;
|
|
|
|
|
output.port = OFPP_TABLE;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
output.max_len = 0;
|
|
|
|
|
|
2015-09-15 19:37:06 -07:00
|
|
|
|
return ofproto_dpif_execute_actions__(xbridge->ofproto, &flow, NULL,
|
|
|
|
|
&output.ofpact, sizeof output,
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
ctx->indentation, ctx->depth,
|
|
|
|
|
ctx->resubmits, packet);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-04 12:36:48 -02:00
|
|
|
|
static void
|
|
|
|
|
tnl_send_nd_request(struct xlate_ctx *ctx, const struct xport *out_dev,
|
|
|
|
|
const struct eth_addr eth_src,
|
|
|
|
|
struct in6_addr * ipv6_src, struct in6_addr * ipv6_dst)
|
|
|
|
|
{
|
|
|
|
|
struct dp_packet packet;
|
|
|
|
|
|
|
|
|
|
dp_packet_init(&packet, 0);
|
|
|
|
|
compose_nd(&packet, eth_src, ipv6_src, ipv6_dst);
|
|
|
|
|
compose_table_xlate(ctx, out_dev, &packet);
|
|
|
|
|
dp_packet_uninit(&packet);
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 11:53:47 -08:00
|
|
|
|
static void
|
2015-09-15 19:37:06 -07:00
|
|
|
|
tnl_send_arp_request(struct xlate_ctx *ctx, const struct xport *out_dev,
|
2015-08-28 14:55:11 -07:00
|
|
|
|
const struct eth_addr eth_src,
|
2014-11-11 11:53:47 -08:00
|
|
|
|
ovs_be32 ip_src, ovs_be32 ip_dst)
|
|
|
|
|
{
|
2015-02-22 03:21:09 -08:00
|
|
|
|
struct dp_packet packet;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
2015-02-22 03:21:09 -08:00
|
|
|
|
dp_packet_init(&packet, 0);
|
2015-06-14 11:03:23 -07:00
|
|
|
|
compose_arp(&packet, ARP_OP_REQUEST,
|
|
|
|
|
eth_src, eth_addr_zero, true, ip_src, ip_dst);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
2015-09-15 19:37:06 -07:00
|
|
|
|
compose_table_xlate(ctx, out_dev, &packet);
|
2015-02-22 03:21:09 -08:00
|
|
|
|
dp_packet_uninit(&packet);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2015-05-10 00:05:23 -07:00
|
|
|
|
build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
|
2014-11-11 11:53:47 -08:00
|
|
|
|
const struct flow *flow, odp_port_t tunnel_odp_port)
|
|
|
|
|
{
|
2016-05-23 20:27:14 -07:00
|
|
|
|
struct netdev_tnl_build_header_params tnl_params;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
struct ovs_action_push_tnl tnl_push_data;
|
|
|
|
|
struct xport *out_dev = NULL;
|
2015-12-04 12:36:48 -02:00
|
|
|
|
ovs_be32 s_ip = 0, d_ip = 0;
|
|
|
|
|
struct in6_addr s_ip6 = in6addr_any;
|
|
|
|
|
struct in6_addr d_ip6 = in6addr_any;
|
2015-08-28 14:55:11 -07:00
|
|
|
|
struct eth_addr smac;
|
|
|
|
|
struct eth_addr dmac;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
int err;
|
2015-12-04 12:36:48 -02:00
|
|
|
|
char buf_sip6[INET6_ADDRSTRLEN];
|
|
|
|
|
char buf_dip6[INET6_ADDRSTRLEN];
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
2016-03-24 09:30:57 -07:00
|
|
|
|
err = tnl_route_lookup_flow(flow, &d_ip6, &s_ip6, &out_dev);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (err) {
|
2015-05-10 00:05:23 -07:00
|
|
|
|
xlate_report(ctx, "native tunnel routing failed");
|
2014-11-11 11:53:47 -08:00
|
|
|
|
return err;
|
|
|
|
|
}
|
2015-12-04 12:36:48 -02:00
|
|
|
|
|
|
|
|
|
xlate_report(ctx, "tunneling to %s via %s",
|
|
|
|
|
ipv6_string_mapped(buf_dip6, &d_ip6),
|
|
|
|
|
netdev_get_name(out_dev->netdev));
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
|
|
|
|
/* Use mac addr of bridge port of the peer. */
|
2015-08-28 14:55:11 -07:00
|
|
|
|
err = netdev_get_etheraddr(out_dev->netdev, &smac);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (err) {
|
2015-05-10 00:05:23 -07:00
|
|
|
|
xlate_report(ctx, "tunnel output device lacks Ethernet address");
|
2014-11-11 11:53:47 -08:00
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-04 12:36:48 -02:00
|
|
|
|
d_ip = in6_addr_get_mapped_ipv4(&d_ip6);
|
|
|
|
|
if (d_ip) {
|
2016-03-24 09:30:57 -07:00
|
|
|
|
s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-04 12:36:48 -02:00
|
|
|
|
err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (err) {
|
2015-12-04 12:36:48 -02:00
|
|
|
|
xlate_report(ctx, "neighbor cache miss for %s on bridge %s, "
|
|
|
|
|
"sending %s request",
|
|
|
|
|
buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
|
|
|
|
|
if (d_ip) {
|
|
|
|
|
tnl_send_arp_request(ctx, out_dev, smac, s_ip, d_ip);
|
|
|
|
|
} else {
|
|
|
|
|
tnl_send_nd_request(ctx, out_dev, smac, &s_ip6, &d_ip6);
|
|
|
|
|
}
|
2014-11-11 11:53:47 -08:00
|
|
|
|
return err;
|
|
|
|
|
}
|
2015-12-04 12:36:48 -02:00
|
|
|
|
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
2015-11-30 16:24:49 -02:00
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_NEIGH);
|
|
|
|
|
ovs_strlcpy(entry->u.tnl_neigh_cache.br_name, out_dev->xbridge->name,
|
|
|
|
|
sizeof entry->u.tnl_neigh_cache.br_name);
|
2015-12-04 12:36:48 -02:00
|
|
|
|
entry->u.tnl_neigh_cache.d_ipv6 = d_ip6;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
}
|
2015-05-10 00:05:23 -07:00
|
|
|
|
|
2015-12-04 12:36:48 -02:00
|
|
|
|
xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" %s"
|
|
|
|
|
" to "ETH_ADDR_FMT" %s",
|
|
|
|
|
ETH_ADDR_ARGS(smac), ipv6_string_mapped(buf_sip6, &s_ip6),
|
|
|
|
|
ETH_ADDR_ARGS(dmac), buf_dip6);
|
|
|
|
|
|
2016-05-23 20:27:14 -07:00
|
|
|
|
netdev_init_tnl_build_header_params(&tnl_params, flow, &s_ip6, dmac, smac);
|
|
|
|
|
err = tnl_port_build_header(xport->ofport, &tnl_push_data, &tnl_params);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (err) {
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
|
|
|
|
|
tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
|
2015-07-31 13:34:16 -07:00
|
|
|
|
odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-09 19:00:17 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_commit_actions(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
bool use_masked = ctx->xbridge->support.masked_set_action;
|
|
|
|
|
|
|
|
|
|
ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
|
|
|
|
|
ctx->odp_actions, ctx->wc,
|
|
|
|
|
use_masked);
|
|
|
|
|
}
|
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
static void
|
|
|
|
|
clear_conntrack(struct flow *flow)
|
|
|
|
|
{
|
|
|
|
|
flow->ct_state = 0;
|
|
|
|
|
flow->ct_zone = 0;
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
flow->ct_mark = 0;
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
memset(&flow->ct_label, 0, sizeof flow->ct_label);
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
2013-06-19 16:58:44 -07:00
|
|
|
|
compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
|
2015-03-12 13:02:07 -07:00
|
|
|
|
const struct xlate_bond_recirc *xr, bool check_stp)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc = ctx->wc;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
struct flow_tnl flow_tnl;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
ovs_be16 flow_vlan_tci;
|
2013-08-06 12:57:13 -07:00
|
|
|
|
uint32_t flow_pkt_mark;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint8_t flow_nw_tos;
|
2013-06-19 16:58:44 -07:00
|
|
|
|
odp_port_t out_port, odp_port;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
bool tnl_push_pop_send = false;
|
2013-06-12 15:01:11 -07:00
|
|
|
|
uint8_t dscp;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
/* If 'struct flow' gets additional metadata, we'll need to zero it out
|
|
|
|
|
* before traversing a patch port. */
|
2015-11-25 11:31:11 -02:00
|
|
|
|
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 35);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
memset(&flow_tnl, 0, sizeof flow_tnl);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (!xport) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_report(ctx, "Nonexistent output port");
|
|
|
|
|
return;
|
2013-06-13 18:38:24 -07:00
|
|
|
|
} else if (xport->config & OFPUTIL_PC_NO_FWD) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
|
|
|
|
|
return;
|
2014-03-09 17:48:52 +08:00
|
|
|
|
} else if (check_stp) {
|
2014-04-24 13:18:18 -07:00
|
|
|
|
if (is_stp(&ctx->base_flow)) {
|
2014-08-22 09:01:34 -07:00
|
|
|
|
if (!xport_stp_should_forward_bpdu(xport) &&
|
|
|
|
|
!xport_rstp_should_manage_bpdu(xport)) {
|
|
|
|
|
if (ctx->xbridge->stp != NULL) {
|
|
|
|
|
xlate_report(ctx, "STP not in listening state, "
|
|
|
|
|
"skipping bpdu output");
|
|
|
|
|
} else if (ctx->xbridge->rstp != NULL) {
|
|
|
|
|
xlate_report(ctx, "RSTP not managing BPDU in this state, "
|
|
|
|
|
"skipping bpdu output");
|
|
|
|
|
}
|
2014-03-09 17:48:52 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
2014-08-22 09:01:34 -07:00
|
|
|
|
} else if (!xport_stp_forward_state(xport) ||
|
|
|
|
|
!xport_rstp_forward_state(xport)) {
|
|
|
|
|
if (ctx->xbridge->stp != NULL) {
|
|
|
|
|
xlate_report(ctx, "STP not in forwarding state, "
|
|
|
|
|
"skipping output");
|
|
|
|
|
} else if (ctx->xbridge->rstp != NULL) {
|
|
|
|
|
xlate_report(ctx, "RSTP not in forwarding state, "
|
|
|
|
|
"skipping output");
|
|
|
|
|
}
|
2014-03-09 17:48:52 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xport->peer) {
|
|
|
|
|
const struct xport *peer = xport->peer;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
struct flow old_flow = ctx->xin->flow;
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
bool old_conntrack = ctx->conntracked;
|
2016-05-25 10:34:31 +09:00
|
|
|
|
bool old_was_mpls = ctx->was_mpls;
|
2015-06-12 16:12:56 -07:00
|
|
|
|
cls_version_t old_version = ctx->tables_version;
|
2015-03-16 17:33:16 -07:00
|
|
|
|
struct ofpbuf old_stack = ctx->stack;
|
|
|
|
|
union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
|
2015-03-11 18:01:51 -07:00
|
|
|
|
struct ofpbuf old_action_set = ctx->action_set;
|
|
|
|
|
uint64_t actset_stub[1024 / 8];
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-03-16 17:33:16 -07:00
|
|
|
|
ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
|
2015-03-11 18:01:51 -07:00
|
|
|
|
ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
ctx->xbridge = peer->xbridge;
|
|
|
|
|
flow->in_port.ofp_port = peer->ofp_port;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->metadata = htonll(0);
|
|
|
|
|
memset(&flow->tunnel, 0, sizeof flow->tunnel);
|
|
|
|
|
memset(flow->regs, 0, sizeof flow->regs);
|
2014-11-03 14:24:01 -08:00
|
|
|
|
flow->actset_output = OFPP_UNSET;
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
ctx->conntracked = false;
|
|
|
|
|
clear_conntrack(flow);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-06-11 15:53:43 -07:00
|
|
|
|
/* The bridge is now known so obtain its table version. */
|
|
|
|
|
ctx->tables_version
|
|
|
|
|
= ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
|
|
|
|
|
|
2015-07-23 13:39:38 -07:00
|
|
|
|
if (!process_special(ctx, peer) && may_receive(peer, ctx)) {
|
2014-08-22 09:01:34 -07:00
|
|
|
|
if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (!ctx->freezing) {
|
2016-01-28 17:11:19 -08:00
|
|
|
|
xlate_action_set(ctx);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (ctx->freezing) {
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
finish_freezing(ctx);
|
2015-03-11 18:01:51 -07:00
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
2014-08-22 09:01:34 -07:00
|
|
|
|
/* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and
|
|
|
|
|
* the learning action look at the packet, then drop it. */
|
2013-06-11 13:32:30 -07:00
|
|
|
|
struct flow old_base_flow = ctx->base_flow;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
size_t old_size = ctx->odp_actions->size;
|
2015-07-23 17:08:14 -07:00
|
|
|
|
mirror_mask_t old_mirrors = ctx->mirrors;
|
2015-03-12 09:47:31 -07:00
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
|
2015-07-23 17:08:14 -07:00
|
|
|
|
ctx->mirrors = old_mirrors;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
ctx->base_flow = old_base_flow;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ctx->odp_actions->size = old_size;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Undo changes that may have been done for freezing. */
|
|
|
|
|
ctx_cancel_freeze(ctx);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ctx->xin->flow = old_flow;
|
2013-08-08 11:08:03 -07:00
|
|
|
|
ctx->xbridge = xport->xbridge;
|
2015-03-11 18:01:51 -07:00
|
|
|
|
ofpbuf_uninit(&ctx->action_set);
|
|
|
|
|
ctx->action_set = old_action_set;
|
2015-03-16 17:33:16 -07:00
|
|
|
|
ofpbuf_uninit(&ctx->stack);
|
|
|
|
|
ctx->stack = old_stack;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-06-11 15:53:43 -07:00
|
|
|
|
/* Restore calling bridge's lookup version. */
|
|
|
|
|
ctx->tables_version = old_version;
|
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
/* The peer bridge popping MPLS should have no effect on the original
|
|
|
|
|
* bridge. */
|
|
|
|
|
ctx->was_mpls = old_was_mpls;
|
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
/* The peer bridge's conntrack execution should have no effect on the
|
|
|
|
|
* original bridge. */
|
|
|
|
|
ctx->conntracked = old_conntrack;
|
|
|
|
|
|
2015-03-12 09:47:31 -07:00
|
|
|
|
/* The fact that the peer bridge exits (for any reason) does not mean
|
|
|
|
|
* that the original bridge should exit. Specifically, if the peer
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* bridge freezes translation, the original bridge must continue
|
|
|
|
|
* processing with the original, not the frozen packet! */
|
2015-03-12 09:47:31 -07:00
|
|
|
|
ctx->exit = false;
|
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
/* Peer bridge errors do not propagate back. */
|
|
|
|
|
ctx->error = XLATE_OK;
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (ctx->xin->resubmit_stats) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
|
|
|
|
|
netdev_vport_inc_rx(peer->netdev, ctx->xin->resubmit_stats);
|
2013-12-09 17:34:53 -08:00
|
|
|
|
if (peer->bfd) {
|
|
|
|
|
bfd_account_rx(peer->bfd, ctx->xin->resubmit_stats);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
|
|
|
|
|
entry->u.dev.tx = netdev_ref(xport->netdev);
|
|
|
|
|
entry->u.dev.rx = netdev_ref(peer->netdev);
|
|
|
|
|
entry->u.dev.bfd = bfd_ref(peer->bfd);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow_vlan_tci = flow->vlan_tci;
|
2013-08-06 12:57:13 -07:00
|
|
|
|
flow_pkt_mark = flow->pkt_mark;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow_nw_tos = flow->nw_tos;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2014-09-17 15:01:48 -07:00
|
|
|
|
if (count_skb_priorities(xport)) {
|
|
|
|
|
memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority);
|
|
|
|
|
if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
|
|
|
|
|
wc->masks.nw_tos |= IP_DSCP_MASK;
|
|
|
|
|
flow->nw_tos &= ~IP_DSCP_MASK;
|
|
|
|
|
flow->nw_tos |= dscp;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
if (xport->is_tunnel) {
|
2015-12-04 12:36:48 -02:00
|
|
|
|
struct in6_addr dst;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
/* Save tunnel metadata so that changes made due to
|
|
|
|
|
* the Logical (tunnel) Port are not visible for any further
|
|
|
|
|
* matches, while explicit set actions on tunnel metadata are.
|
|
|
|
|
*/
|
2014-11-11 11:53:47 -08:00
|
|
|
|
flow_tnl = flow->tunnel;
|
2015-07-31 13:15:52 -07:00
|
|
|
|
odp_port = tnl_port_send(xport->ofport, flow, ctx->wc);
|
2013-06-19 16:58:44 -07:00
|
|
|
|
if (odp_port == ODPP_NONE) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_report(ctx, "Tunneling decided against output");
|
|
|
|
|
goto out; /* restore flow_nw_tos */
|
|
|
|
|
}
|
2015-12-04 12:36:48 -02:00
|
|
|
|
dst = flow_tnl_dst(&flow->tunnel);
|
|
|
|
|
if (ipv6_addr_equals(&dst, &ctx->orig_tunnel_ipv6_dst)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_report(ctx, "Not tunneling to our own address");
|
|
|
|
|
goto out; /* restore flow_nw_tos */
|
|
|
|
|
}
|
|
|
|
|
if (ctx->xin->resubmit_stats) {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
|
|
|
|
|
entry->u.dev.tx = netdev_ref(xport->netdev);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
out_port = odp_port;
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
|
2015-05-10 00:05:23 -07:00
|
|
|
|
xlate_report(ctx, "output to native tunnel");
|
2014-11-11 11:53:47 -08:00
|
|
|
|
tnl_push_pop_send = true;
|
|
|
|
|
} else {
|
2015-05-10 00:05:23 -07:00
|
|
|
|
xlate_report(ctx, "output to kernel tunnel");
|
2015-07-31 13:34:16 -07:00
|
|
|
|
commit_odp_tunnel_action(flow, &ctx->base_flow, ctx->odp_actions);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
flow->tunnel = flow_tnl; /* Restore tunnel metadata */
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
2013-06-13 18:38:24 -07:00
|
|
|
|
odp_port = xport->odp_port;
|
2014-02-07 11:34:01 -08:00
|
|
|
|
out_port = odp_port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-19 16:58:44 -07:00
|
|
|
|
if (out_port != ODPP_NONE) {
|
2015-09-09 19:00:17 -07:00
|
|
|
|
xlate_commit_actions(ctx);
|
2014-03-05 15:27:31 -08:00
|
|
|
|
|
2015-03-12 13:02:07 -07:00
|
|
|
|
if (xr) {
|
2014-04-08 18:42:39 -07:00
|
|
|
|
struct ovs_action_hash *act_hash;
|
2014-03-05 15:27:31 -08:00
|
|
|
|
|
2014-04-08 18:42:39 -07:00
|
|
|
|
/* Hash action. */
|
2015-07-31 13:34:16 -07:00
|
|
|
|
act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
|
2014-04-08 18:42:39 -07:00
|
|
|
|
OVS_ACTION_ATTR_HASH,
|
|
|
|
|
sizeof *act_hash);
|
|
|
|
|
act_hash->hash_alg = xr->hash_alg;
|
2014-04-17 20:06:58 -07:00
|
|
|
|
act_hash->hash_basis = xr->hash_basis;
|
2014-04-08 18:42:39 -07:00
|
|
|
|
|
|
|
|
|
/* Recirc action. */
|
2015-07-31 13:34:16 -07:00
|
|
|
|
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
|
2014-04-08 18:42:39 -07:00
|
|
|
|
xr->recirc_id);
|
2014-03-05 15:27:31 -08:00
|
|
|
|
} else {
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
|
|
|
|
if (tnl_push_pop_send) {
|
|
|
|
|
build_tunnel_send(ctx, xport, flow, odp_port);
|
|
|
|
|
flow->tunnel = flow_tnl; /* Restore tunnel metadata */
|
|
|
|
|
} else {
|
|
|
|
|
odp_port_t odp_tnl_port = ODPP_NONE;
|
|
|
|
|
|
|
|
|
|
/* XXX: Write better Filter for tunnel port. We can use inport
|
|
|
|
|
* int tunnel-port flow to avoid these checks completely. */
|
|
|
|
|
if (ofp_port == OFPP_LOCAL &&
|
|
|
|
|
ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
|
|
|
|
|
|
|
|
|
|
odp_tnl_port = tnl_port_map_lookup(flow, wc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (odp_tnl_port != ODPP_NONE) {
|
2015-07-31 13:34:16 -07:00
|
|
|
|
nl_msg_put_odp_port(ctx->odp_actions,
|
2014-11-11 11:53:47 -08:00
|
|
|
|
OVS_ACTION_ATTR_TUNNEL_POP,
|
|
|
|
|
odp_tnl_port);
|
|
|
|
|
} else {
|
|
|
|
|
/* Tunnel push-pop action is not compatible with
|
|
|
|
|
* IPFIX action. */
|
2015-07-29 15:24:05 -07:00
|
|
|
|
compose_ipfix_action(ctx, out_port);
|
2015-07-31 13:34:16 -07:00
|
|
|
|
nl_msg_put_odp_port(ctx->odp_actions,
|
2014-11-11 11:53:47 -08:00
|
|
|
|
OVS_ACTION_ATTR_OUTPUT,
|
|
|
|
|
out_port);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-03-05 15:27:31 -08:00
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-06-17 14:04:36 -07:00
|
|
|
|
ctx->sflow_odp_port = odp_port;
|
|
|
|
|
ctx->sflow_n_outputs++;
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx->nf_output_iface = ofp_port;
|
2013-06-17 14:04:36 -07:00
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
if (mbridge_has_mirrors(ctx->xbridge->mbridge) && xport->xbundle) {
|
|
|
|
|
mirror_packet(ctx, xport->xbundle,
|
|
|
|
|
xbundle_mirror_dst(xport->xbundle->xbridge,
|
|
|
|
|
xport->xbundle));
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-17 14:04:36 -07:00
|
|
|
|
out:
|
2013-06-11 13:32:30 -07:00
|
|
|
|
/* Restore flow */
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->vlan_tci = flow_vlan_tci;
|
2013-08-06 12:57:13 -07:00
|
|
|
|
flow->pkt_mark = flow_pkt_mark;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->nw_tos = flow_nw_tos;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(struct xlate_ctx *ctx, ofp_port_t ofp_port,
|
|
|
|
|
const struct xlate_bond_recirc *xr)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action__(ctx, ofp_port, xr, true);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-08-23 11:03:55 -07:00
|
|
|
|
static void
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule, bool deepens)
|
2013-08-23 11:03:55 -07:00
|
|
|
|
{
|
|
|
|
|
struct rule_dpif *old_rule = ctx->rule;
|
2015-03-13 13:27:19 -07:00
|
|
|
|
ovs_be64 old_cookie = ctx->rule_cookie;
|
2014-04-29 15:50:38 -07:00
|
|
|
|
const struct rule_actions *actions;
|
2013-08-23 11:03:55 -07:00
|
|
|
|
|
|
|
|
|
if (ctx->xin->resubmit_stats) {
|
2013-08-27 13:17:11 -07:00
|
|
|
|
rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
|
2013-08-23 11:03:55 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-04 08:47:16 -07:00
|
|
|
|
ctx->resubmits++;
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
|
2016-04-21 10:50:16 -07:00
|
|
|
|
ctx->indentation++;
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
ctx->depth += deepens;
|
2013-08-23 11:03:55 -07:00
|
|
|
|
ctx->rule = rule;
|
2015-03-13 13:27:19 -07:00
|
|
|
|
ctx->rule_cookie = rule_dpif_get_flow_cookie(rule);
|
2013-09-09 13:05:52 -07:00
|
|
|
|
actions = rule_dpif_get_actions(rule);
|
|
|
|
|
do_xlate_actions(actions->ofpacts, actions->ofpacts_len, ctx);
|
2015-03-13 13:27:19 -07:00
|
|
|
|
ctx->rule_cookie = old_cookie;
|
2013-08-23 11:03:55 -07:00
|
|
|
|
ctx->rule = old_rule;
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
ctx->depth -= deepens;
|
2016-04-21 10:50:16 -07:00
|
|
|
|
ctx->indentation--;
|
2013-08-23 11:03:55 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 18:17:13 +09:00
|
|
|
|
static bool
|
|
|
|
|
xlate_resubmit_resource_check(struct xlate_ctx *ctx)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
if (ctx->depth >= MAX_DEPTH) {
|
|
|
|
|
XLATE_REPORT_ERROR(ctx, "over max translation depth %d", MAX_DEPTH);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ctx->error = XLATE_RECURSION_TOO_DEEP;
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
} else if (ctx->resubmits >= MAX_RESUBMITS) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
XLATE_REPORT_ERROR(ctx, "over %d resubmit actions", MAX_RESUBMITS);
|
|
|
|
|
ctx->error = XLATE_TOO_MANY_RESUBMITS;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
} else if (ctx->odp_actions->size > UINT16_MAX) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of actions");
|
|
|
|
|
/* NOT an error, as we'll be slow-pathing the flow in this case? */
|
|
|
|
|
ctx->exit = true; /* XXX: translation still terminated! */
|
2015-03-02 17:29:44 -08:00
|
|
|
|
} else if (ctx->stack.size >= 65536) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of stack");
|
|
|
|
|
ctx->error = XLATE_STACK_TOO_DEEP;
|
2013-10-04 08:47:16 -07:00
|
|
|
|
} else {
|
2013-10-30 18:17:13 +09:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2014-03-20 13:42:22 -07:00
|
|
|
|
xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
|
|
|
|
|
bool may_packet_in, bool honor_table_miss)
|
2013-10-30 18:17:13 +09:00
|
|
|
|
{
|
2016-05-25 10:34:31 +09:00
|
|
|
|
/* Check if we need to recirculate before matching in a table. */
|
|
|
|
|
if (ctx->was_mpls) {
|
|
|
|
|
ctx_trigger_freeze(ctx);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2013-10-30 18:17:13 +09:00
|
|
|
|
if (xlate_resubmit_resource_check(ctx)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint8_t old_table_id = ctx->table_id;
|
2013-12-05 13:09:27 -08:00
|
|
|
|
struct rule_dpif *rule;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
ctx->table_id = table_id;
|
|
|
|
|
|
2014-11-10 13:14:29 -08:00
|
|
|
|
rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
|
2015-06-11 15:53:43 -07:00
|
|
|
|
ctx->tables_version,
|
2016-04-22 17:45:03 -07:00
|
|
|
|
&ctx->xin->flow, ctx->wc,
|
2014-11-10 13:14:29 -08:00
|
|
|
|
ctx->xin->resubmit_stats,
|
|
|
|
|
&ctx->table_id, in_port,
|
|
|
|
|
may_packet_in, honor_table_miss);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2014-11-04 11:17:11 -08:00
|
|
|
|
if (OVS_UNLIKELY(ctx->xin->resubmit_hook)) {
|
2016-04-21 10:50:16 -07:00
|
|
|
|
ctx->xin->resubmit_hook(ctx->xin, rule, ctx->indentation + 1);
|
2013-07-17 16:14:02 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-11 23:23:00 -07:00
|
|
|
|
if (rule) {
|
2014-04-24 08:21:49 -07:00
|
|
|
|
/* Fill in the cache entry here instead of xlate_recursively
|
|
|
|
|
* to make the reference counting more explicit. We take a
|
|
|
|
|
* reference in the lookups above if we are going to cache the
|
|
|
|
|
* rule. */
|
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_RULE);
|
|
|
|
|
entry->u.rule = rule;
|
2015-08-02 11:51:32 -07:00
|
|
|
|
rule_dpif_ref(rule);
|
2014-04-24 08:21:49 -07:00
|
|
|
|
}
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
xlate_recursively(ctx, rule, table_id <= old_table_id);
|
2013-07-17 16:14:02 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
ctx->table_id = old_table_id;
|
2013-10-04 08:47:16 -07:00
|
|
|
|
return;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 18:17:14 +09:00
|
|
|
|
static void
|
2014-05-22 10:47:13 +00:00
|
|
|
|
xlate_group_stats(struct xlate_ctx *ctx, struct group_dpif *group,
|
|
|
|
|
struct ofputil_bucket *bucket)
|
|
|
|
|
{
|
|
|
|
|
if (ctx->xin->resubmit_stats) {
|
|
|
|
|
group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats);
|
|
|
|
|
}
|
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP);
|
|
|
|
|
entry->u.group.group = group_dpif_ref(group);
|
|
|
|
|
entry->u.group.bucket = bucket;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
|
2013-10-30 18:17:14 +09:00
|
|
|
|
{
|
|
|
|
|
uint64_t action_list_stub[1024 / 8];
|
2016-02-18 15:13:09 -08:00
|
|
|
|
struct ofpbuf action_list = OFPBUF_STUB_INITIALIZER(action_list_stub);
|
|
|
|
|
struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
|
|
|
|
|
bucket->ofpacts_len);
|
2015-03-19 15:20:21 -07:00
|
|
|
|
struct flow old_flow = ctx->xin->flow;
|
2016-05-25 10:34:31 +09:00
|
|
|
|
bool old_was_mpls = ctx->was_mpls;
|
2013-10-30 18:17:14 +09:00
|
|
|
|
|
|
|
|
|
ofpacts_execute_action_set(&action_list, &action_set);
|
2016-04-21 10:50:16 -07:00
|
|
|
|
ctx->indentation++;
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
ctx->depth++;
|
2015-03-02 17:29:44 -08:00
|
|
|
|
do_xlate_actions(action_list.data, action_list.size, ctx);
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
ctx->depth--;
|
2016-04-21 10:50:16 -07:00
|
|
|
|
ctx->indentation--;
|
2013-10-30 18:17:14 +09:00
|
|
|
|
|
|
|
|
|
ofpbuf_uninit(&action_list);
|
2015-03-19 15:20:21 -07:00
|
|
|
|
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
/* Check if need to freeze. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (ctx->freezing) {
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
finish_freezing(ctx);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-03-19 15:20:21 -07:00
|
|
|
|
/* Roll back flow to previous state.
|
|
|
|
|
* This is equivalent to cloning the packet for each bucket.
|
|
|
|
|
*
|
|
|
|
|
* As a side effect any subsequently applied actions will
|
|
|
|
|
* also effectively be applied to a clone of the packet taken
|
|
|
|
|
* just before applying the all or indirect group.
|
|
|
|
|
*
|
|
|
|
|
* Note that group buckets are action sets, hence they cannot modify the
|
|
|
|
|
* main action set. Also any stack actions are ignored when executing an
|
|
|
|
|
* action set, so group buckets cannot change the stack either.
|
|
|
|
|
* However, we do allow resubmit actions in group buckets, which could
|
|
|
|
|
* break the above assumptions. It is up to the controller to not mess up
|
|
|
|
|
* with the action_set and stack in the tables resubmitted to from
|
|
|
|
|
* group buckets. */
|
|
|
|
|
ctx->xin->flow = old_flow;
|
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
/* The group bucket popping MPLS should have no effect after bucket
|
|
|
|
|
* execution. */
|
|
|
|
|
ctx->was_mpls = old_was_mpls;
|
|
|
|
|
|
2015-03-19 15:20:21 -07:00
|
|
|
|
/* The fact that the group bucket exits (for any reason) does not mean that
|
|
|
|
|
* the translation after the group action should exit. Specifically, if
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* the group bucket freezes translation, the actions after the group action
|
|
|
|
|
* must continue processing with the original, not the frozen packet! */
|
2015-03-19 15:20:21 -07:00
|
|
|
|
ctx->exit = false;
|
2013-10-30 18:17:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group)
|
|
|
|
|
{
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct ofputil_bucket *bucket;
|
2014-12-15 14:10:38 +01:00
|
|
|
|
const struct ovs_list *buckets;
|
2013-10-30 18:17:14 +09:00
|
|
|
|
|
|
|
|
|
group_dpif_get_buckets(group, &buckets);
|
|
|
|
|
|
|
|
|
|
LIST_FOR_EACH (bucket, list_node, buckets) {
|
|
|
|
|
xlate_group_bucket(ctx, bucket);
|
|
|
|
|
}
|
2014-05-22 10:47:13 +00:00
|
|
|
|
xlate_group_stats(ctx, group, NULL);
|
2013-10-30 18:17:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 18:17:18 +09:00
|
|
|
|
static void
|
|
|
|
|
xlate_ff_group(struct xlate_ctx *ctx, struct group_dpif *group)
|
|
|
|
|
{
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct ofputil_bucket *bucket;
|
2013-10-30 18:17:18 +09:00
|
|
|
|
|
|
|
|
|
bucket = group_first_live_bucket(ctx, group, 0);
|
|
|
|
|
if (bucket) {
|
|
|
|
|
xlate_group_bucket(ctx, bucket);
|
2014-05-22 10:47:13 +00:00
|
|
|
|
xlate_group_stats(ctx, group, bucket);
|
2013-10-30 18:17:18 +09:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 18:17:19 +09:00
|
|
|
|
static void
|
2015-03-20 13:50:32 +09:00
|
|
|
|
xlate_default_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
|
2013-10-30 18:17:19 +09:00
|
|
|
|
{
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc = ctx->wc;
|
2014-05-22 10:47:13 +00:00
|
|
|
|
struct ofputil_bucket *bucket;
|
2013-10-30 18:17:19 +09:00
|
|
|
|
uint32_t basis;
|
|
|
|
|
|
2014-08-15 09:42:46 -07:00
|
|
|
|
basis = flow_hash_symmetric_l4(&ctx->xin->flow, 0);
|
2015-03-19 15:39:48 -07:00
|
|
|
|
flow_mask_hash_fields(&ctx->xin->flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4);
|
2013-10-30 18:17:19 +09:00
|
|
|
|
bucket = group_best_live_bucket(ctx, group, basis);
|
|
|
|
|
if (bucket) {
|
|
|
|
|
xlate_group_bucket(ctx, bucket);
|
2014-05-22 10:47:13 +00:00
|
|
|
|
xlate_group_stats(ctx, group, bucket);
|
2013-10-30 18:17:19 +09:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-20 13:50:34 +09:00
|
|
|
|
static void
|
|
|
|
|
xlate_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
|
|
|
|
|
{
|
|
|
|
|
struct mf_bitmap hash_fields = MF_BITMAP_INITIALIZER;
|
|
|
|
|
const struct field_array *fields;
|
|
|
|
|
struct ofputil_bucket *bucket;
|
|
|
|
|
uint32_t basis;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
fields = group_dpif_get_fields(group);
|
|
|
|
|
basis = hash_uint64(group_dpif_get_selection_method_param(group));
|
|
|
|
|
|
|
|
|
|
/* Determine which fields to hash */
|
|
|
|
|
for (i = 0; i < MFF_N_IDS; i++) {
|
|
|
|
|
if (bitmap_is_set(fields->used.bm, i)) {
|
|
|
|
|
const struct mf_field *mf;
|
|
|
|
|
|
|
|
|
|
/* If the field is already present in 'hash_fields' then
|
|
|
|
|
* this loop has already checked that it and its pre-requisites
|
|
|
|
|
* are present in the flow and its pre-requisites have
|
|
|
|
|
* already been added to 'hash_fields'. There is nothing more
|
|
|
|
|
* to do here and as an optimisation the loop can continue. */
|
|
|
|
|
if (bitmap_is_set(hash_fields.bm, i)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mf = mf_from_id(i);
|
|
|
|
|
|
|
|
|
|
/* Only hash a field if it and its pre-requisites are present
|
|
|
|
|
* in the flow. */
|
|
|
|
|
if (!mf_are_prereqs_ok(mf, &ctx->xin->flow)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Hash both the field and its pre-requisites */
|
|
|
|
|
mf_bitmap_set_field_and_prereqs(mf, &hash_fields);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Hash the fields */
|
|
|
|
|
for (i = 0; i < MFF_N_IDS; i++) {
|
|
|
|
|
if (bitmap_is_set(hash_fields.bm, i)) {
|
|
|
|
|
const struct mf_field *mf = mf_from_id(i);
|
|
|
|
|
union mf_value value;
|
|
|
|
|
int j;
|
|
|
|
|
|
|
|
|
|
mf_get_value(mf, &ctx->xin->flow, &value);
|
|
|
|
|
/* This seems inefficient but so does apply_mask() */
|
|
|
|
|
for (j = 0; j < mf->n_bytes; j++) {
|
|
|
|
|
((uint8_t *) &value)[j] &= ((uint8_t *) &fields->value[i])[j];
|
|
|
|
|
}
|
|
|
|
|
basis = hash_bytes(&value, mf->n_bytes, basis);
|
|
|
|
|
|
2015-08-11 18:41:37 -07:00
|
|
|
|
/* For tunnels, hash in whether the field is present. */
|
|
|
|
|
if (mf_is_tun_metadata(mf)) {
|
|
|
|
|
basis = hash_boolean(mf_is_set(mf, &ctx->xin->flow), basis);
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
mf_mask_field(mf, &ctx->wc->masks);
|
2015-03-20 13:50:34 +09:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bucket = group_best_live_bucket(ctx, group, basis);
|
|
|
|
|
if (bucket) {
|
|
|
|
|
xlate_group_bucket(ctx, bucket);
|
|
|
|
|
xlate_group_stats(ctx, group, bucket);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-20 13:50:32 +09:00
|
|
|
|
static void
|
|
|
|
|
xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
|
|
|
|
|
{
|
|
|
|
|
const char *selection_method = group_dpif_get_selection_method(group);
|
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
/* Select groups may access flow keys beyond L2 in order to
|
|
|
|
|
* select a bucket. Recirculate as appropriate to make this possible.
|
|
|
|
|
*/
|
|
|
|
|
if (ctx->was_mpls) {
|
|
|
|
|
ctx_trigger_freeze(ctx);
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-20 13:50:32 +09:00
|
|
|
|
if (selection_method[0] == '\0') {
|
|
|
|
|
xlate_default_select_group(ctx, group);
|
2015-03-20 13:50:34 +09:00
|
|
|
|
} else if (!strcasecmp("hash", selection_method)) {
|
|
|
|
|
xlate_hash_fields_select_group(ctx, group);
|
2015-03-20 13:50:32 +09:00
|
|
|
|
} else {
|
|
|
|
|
/* Parsing of groups should ensure this never happens */
|
|
|
|
|
OVS_NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-30 18:17:14 +09:00
|
|
|
|
static void
|
|
|
|
|
xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
|
|
|
|
|
{
|
2015-06-22 15:15:40 -07:00
|
|
|
|
bool was_in_group = ctx->in_group;
|
2014-02-21 16:27:00 -08:00
|
|
|
|
ctx->in_group = true;
|
|
|
|
|
|
2013-10-30 18:17:14 +09:00
|
|
|
|
switch (group_dpif_get_type(group)) {
|
|
|
|
|
case OFPGT11_ALL:
|
|
|
|
|
case OFPGT11_INDIRECT:
|
|
|
|
|
xlate_all_group(ctx, group);
|
|
|
|
|
break;
|
|
|
|
|
case OFPGT11_SELECT:
|
2013-10-30 18:17:19 +09:00
|
|
|
|
xlate_select_group(ctx, group);
|
2013-10-30 18:17:14 +09:00
|
|
|
|
break;
|
2013-10-30 18:17:18 +09:00
|
|
|
|
case OFPGT11_FF:
|
|
|
|
|
xlate_ff_group(ctx, group);
|
|
|
|
|
break;
|
2013-10-30 18:17:14 +09:00
|
|
|
|
default:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2013-10-30 18:17:14 +09:00
|
|
|
|
}
|
2014-05-22 08:12:02 +00:00
|
|
|
|
group_dpif_unref(group);
|
2014-02-21 16:27:00 -08:00
|
|
|
|
|
2015-06-22 15:15:40 -07:00
|
|
|
|
ctx->in_group = was_in_group;
|
2013-10-30 18:17:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
|
|
|
|
|
{
|
2015-06-22 15:15:40 -07:00
|
|
|
|
if (xlate_resubmit_resource_check(ctx)) {
|
2013-10-30 18:17:14 +09:00
|
|
|
|
struct group_dpif *group;
|
|
|
|
|
bool got_group;
|
|
|
|
|
|
|
|
|
|
got_group = group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group);
|
|
|
|
|
if (got_group) {
|
|
|
|
|
xlate_group_action__(ctx, group);
|
|
|
|
|
} else {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_ofpact_resubmit(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact_resubmit *resubmit)
|
|
|
|
|
{
|
2013-06-19 16:58:44 -07:00
|
|
|
|
ofp_port_t in_port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint8_t table_id;
|
2014-03-05 15:27:31 -08:00
|
|
|
|
bool may_packet_in = false;
|
|
|
|
|
bool honor_table_miss = false;
|
|
|
|
|
|
|
|
|
|
if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
|
|
|
|
|
/* Still allow missed packets to be sent to the controller
|
|
|
|
|
* if resubmitting from an internal table. */
|
|
|
|
|
may_packet_in = true;
|
|
|
|
|
honor_table_miss = true;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
in_port = resubmit->in_port;
|
|
|
|
|
if (in_port == OFPP_IN_PORT) {
|
2013-06-19 16:58:44 -07:00
|
|
|
|
in_port = ctx->xin->flow.in_port.ofp_port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
table_id = resubmit->table_id;
|
|
|
|
|
if (table_id == 255) {
|
|
|
|
|
table_id = ctx->table_id;
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-05 15:27:31 -08:00
|
|
|
|
xlate_table_action(ctx, in_port, table_id, may_packet_in,
|
|
|
|
|
honor_table_miss);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
flood_packets(struct xlate_ctx *ctx, bool all)
|
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
const struct xport *xport;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) {
|
|
|
|
|
if (xport->ofp_port == ctx->xin->flow.in_port.ofp_port) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (all) {
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action__(ctx, xport->ofp_port, NULL, false);
|
2013-06-13 18:38:24 -07:00
|
|
|
|
} else if (!(xport->config & OFPUTIL_PC_NO_FLOOD)) {
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(ctx, xport->ofp_port, NULL);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx->nf_output_iface = NF_OUT_FLOOD;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
execute_controller_action(struct xlate_ctx *ctx, int len,
|
|
|
|
|
enum ofp_packet_in_reason reason,
|
2016-02-19 15:53:26 -08:00
|
|
|
|
uint16_t controller_id,
|
|
|
|
|
const uint8_t *userdata, size_t userdata_len)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2016-05-17 17:32:33 -07:00
|
|
|
|
struct dp_packet_batch batch;
|
2015-02-25 12:01:53 -08:00
|
|
|
|
struct dp_packet *packet;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-09-20 12:54:51 -07:00
|
|
|
|
ctx->xout->slow |= SLOW_CONTROLLER;
|
2015-12-04 14:04:26 -08:00
|
|
|
|
xlate_commit_actions(ctx);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (!ctx->xin->packet) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-22 03:21:09 -08:00
|
|
|
|
packet = dp_packet_clone(ctx->xin->packet);
|
2016-05-17 17:32:33 -07:00
|
|
|
|
packet_batch_init_packet(&batch, packet);
|
|
|
|
|
odp_execute_actions(NULL, &batch, false,
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ctx->odp_actions->data, ctx->odp_actions->size, NULL);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
openflow: Better abstract handling of packet-in messages.
Packet-in messages have been a bit of a mess. First, their abstraction
in the form of struct ofputil_packet_in has some fields that are used
in a clear way for incoming and outgoing packet-ins, and others
(packet_len, total_len, buffer_id) have have confusing meanings or
usage pattern depending on their direction.
Second, it's very confusing how a packet-in has both a reason (OFPR_*)
and a miss type (OFPROTO_PACKET_IN_*) and how those add up to the
actual reason that is used "on the wire" for each OpenFlow version (and
even whether the packet-in is sent at all!).
Finally, there's all kind of low-level detail randomly scattered between
connmgr, ofproto-dpif-xlate, and ofp-util.
This commit attempts to clear up some of the confusion. It simplifies
the struct ofputil_packet_in abstraction by removing the members that
didn't have a clear and consistent meaning between incoming and outgoing
packet-ins. It gets rid of OFPROTO_PACKET_IN_*, instead adding a couple
of nonstandard OFPR_* reasons that add up to what OFPROTO_PACKET_IN_*
was meant to say (in what I hope is a clearer way). And it consolidates
the tricky parts into ofp-util, where I hope it will be easier to
understand all in one place.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-20 09:57:16 -08:00
|
|
|
|
/* A packet sent by an action in a table-miss rule is considered an
|
|
|
|
|
* explicit table miss. OpenFlow before 1.3 doesn't have that concept so
|
|
|
|
|
* it will get translated back to OFPR_ACTION for those versions. */
|
|
|
|
|
if (reason == OFPR_ACTION
|
|
|
|
|
&& ctx->rule && rule_dpif_is_table_miss(ctx->rule)) {
|
|
|
|
|
reason = OFPR_EXPLICIT_MISS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t packet_len = dp_packet_size(packet);
|
2013-10-22 16:16:31 -07:00
|
|
|
|
|
2016-01-18 20:12:30 -08:00
|
|
|
|
struct ofproto_async_msg *am = xmalloc(sizeof *am);
|
|
|
|
|
*am = (struct ofproto_async_msg) {
|
openflow: Better abstract handling of packet-in messages.
Packet-in messages have been a bit of a mess. First, their abstraction
in the form of struct ofputil_packet_in has some fields that are used
in a clear way for incoming and outgoing packet-ins, and others
(packet_len, total_len, buffer_id) have have confusing meanings or
usage pattern depending on their direction.
Second, it's very confusing how a packet-in has both a reason (OFPR_*)
and a miss type (OFPROTO_PACKET_IN_*) and how those add up to the
actual reason that is used "on the wire" for each OpenFlow version (and
even whether the packet-in is sent at all!).
Finally, there's all kind of low-level detail randomly scattered between
connmgr, ofproto-dpif-xlate, and ofp-util.
This commit attempts to clear up some of the confusion. It simplifies
the struct ofputil_packet_in abstraction by removing the members that
didn't have a clear and consistent meaning between incoming and outgoing
packet-ins. It gets rid of OFPROTO_PACKET_IN_*, instead adding a couple
of nonstandard OFPR_* reasons that add up to what OFPROTO_PACKET_IN_*
was meant to say (in what I hope is a clearer way). And it consolidates
the tricky parts into ofp-util, where I hope it will be easier to
understand all in one place.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-20 09:57:16 -08:00
|
|
|
|
.controller_id = controller_id,
|
2016-01-18 20:12:30 -08:00
|
|
|
|
.oam = OAM_PACKET_IN,
|
|
|
|
|
.pin = {
|
|
|
|
|
.up = {
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
.public = {
|
|
|
|
|
.packet = dp_packet_steal_data(packet),
|
|
|
|
|
.packet_len = packet_len,
|
|
|
|
|
.reason = reason,
|
|
|
|
|
.table_id = ctx->table_id,
|
|
|
|
|
.cookie = ctx->rule_cookie,
|
|
|
|
|
.userdata = (userdata_len
|
|
|
|
|
? xmemdup(userdata, userdata_len)
|
|
|
|
|
: NULL),
|
|
|
|
|
.userdata_len = userdata_len,
|
|
|
|
|
}
|
2016-01-18 20:12:30 -08:00
|
|
|
|
},
|
|
|
|
|
.max_len = len,
|
openflow: Better abstract handling of packet-in messages.
Packet-in messages have been a bit of a mess. First, their abstraction
in the form of struct ofputil_packet_in has some fields that are used
in a clear way for incoming and outgoing packet-ins, and others
(packet_len, total_len, buffer_id) have have confusing meanings or
usage pattern depending on their direction.
Second, it's very confusing how a packet-in has both a reason (OFPR_*)
and a miss type (OFPROTO_PACKET_IN_*) and how those add up to the
actual reason that is used "on the wire" for each OpenFlow version (and
even whether the packet-in is sent at all!).
Finally, there's all kind of low-level detail randomly scattered between
connmgr, ofproto-dpif-xlate, and ofp-util.
This commit attempts to clear up some of the confusion. It simplifies
the struct ofputil_packet_in abstraction by removing the members that
didn't have a clear and consistent meaning between incoming and outgoing
packet-ins. It gets rid of OFPROTO_PACKET_IN_*, instead adding a couple
of nonstandard OFPR_* reasons that add up to what OFPROTO_PACKET_IN_*
was meant to say (in what I hope is a clearer way). And it consolidates
the tricky parts into ofp-util, where I hope it will be easier to
understand all in one place.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-20 09:57:16 -08:00
|
|
|
|
},
|
|
|
|
|
};
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2016-01-18 20:12:30 -08:00
|
|
|
|
ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
|
2015-02-25 12:01:53 -08:00
|
|
|
|
dp_packet_delete(packet);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-06-24 08:46:31 +09:00
|
|
|
|
static void
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state)
|
2014-06-24 08:46:31 +09:00
|
|
|
|
{
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
struct ofproto_async_msg *am = xmalloc(sizeof *am);
|
|
|
|
|
*am = (struct ofproto_async_msg) {
|
|
|
|
|
.controller_id = ctx->pause->controller_id,
|
|
|
|
|
.oam = OAM_PACKET_IN,
|
|
|
|
|
.pin = {
|
|
|
|
|
.up = {
|
|
|
|
|
.public = {
|
|
|
|
|
.userdata = xmemdup(ctx->pause->userdata,
|
|
|
|
|
ctx->pause->userdata_len),
|
|
|
|
|
.userdata_len = ctx->pause->userdata_len,
|
|
|
|
|
.packet = xmemdup(dp_packet_data(ctx->xin->packet),
|
|
|
|
|
dp_packet_size(ctx->xin->packet)),
|
|
|
|
|
.packet_len = dp_packet_size(ctx->xin->packet),
|
2016-03-09 21:05:03 +05:30
|
|
|
|
.reason = ctx->pause->reason,
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
},
|
|
|
|
|
.bridge = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
|
|
|
|
|
.stack = xmemdup(state->stack,
|
|
|
|
|
state->n_stack * sizeof *state->stack),
|
|
|
|
|
.n_stack = state->n_stack,
|
|
|
|
|
.mirrors = state->mirrors,
|
|
|
|
|
.conntracked = state->conntracked,
|
|
|
|
|
.actions = xmemdup(state->ofpacts, state->ofpacts_len),
|
|
|
|
|
.actions_len = state->ofpacts_len,
|
|
|
|
|
.action_set = xmemdup(state->action_set,
|
|
|
|
|
state->action_set_len),
|
|
|
|
|
.action_set_len = state->action_set_len,
|
|
|
|
|
},
|
|
|
|
|
.max_len = UINT16_MAX,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
|
|
|
|
|
ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
|
|
|
|
|
}
|
2014-06-24 08:46:31 +09:00
|
|
|
|
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
static void
|
|
|
|
|
finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
|
|
|
|
|
{
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ovs_assert(ctx->freezing);
|
2014-06-24 08:46:31 +09:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
struct frozen_state state = {
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
.table_id = table,
|
2016-01-18 14:47:40 -08:00
|
|
|
|
.ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
|
2016-01-20 16:47:14 -08:00
|
|
|
|
.stack = ctx->stack.data,
|
|
|
|
|
.n_stack = ctx->stack.size / sizeof(union mf_subvalue),
|
2015-07-29 22:13:26 -07:00
|
|
|
|
.mirrors = ctx->mirrors,
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
.conntracked = ctx->conntracked,
|
2016-02-16 10:51:58 -08:00
|
|
|
|
.ofpacts = ctx->frozen_actions.data,
|
|
|
|
|
.ofpacts_len = ctx->frozen_actions.size,
|
2016-01-18 14:43:01 -08:00
|
|
|
|
.action_set = ctx->action_set.data,
|
2016-01-28 17:11:19 -08:00
|
|
|
|
.action_set_len = ctx->action_set.size,
|
2015-07-29 20:32:12 -07:00
|
|
|
|
};
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
|
2015-07-29 20:32:12 -07:00
|
|
|
|
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
if (ctx->pause) {
|
|
|
|
|
if (ctx->xin->packet) {
|
|
|
|
|
emit_continuation(ctx, &state);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* Allocate a unique recirc id for the given metadata state in the
|
|
|
|
|
* flow. An existing id, with a new reference to the corresponding
|
|
|
|
|
* recirculation context, will be returned if possible.
|
|
|
|
|
* The life-cycle of this recirc id is managed by associating it
|
|
|
|
|
* with the udpif key ('ukey') created for each new datapath flow. */
|
|
|
|
|
uint32_t id = recirc_alloc_id_ctx(&state);
|
|
|
|
|
if (!id) {
|
|
|
|
|
XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
|
|
|
|
|
ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
recirc_refs_add(&ctx->xout->recircs, id);
|
2014-06-24 08:46:31 +09:00
|
|
|
|
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
|
|
|
|
|
}
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Undo changes done by freezing. */
|
|
|
|
|
ctx_cancel_freeze(ctx);
|
2014-06-24 08:46:31 +09:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Called only when we're freezing. */
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
static void
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
finish_freezing(struct xlate_ctx *ctx)
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
{
|
|
|
|
|
xlate_commit_actions(ctx);
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
finish_freezing__(ctx, 0);
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-06 16:16:47 -08:00
|
|
|
|
/* Fork the pipeline here. The current packet will continue processing the
|
|
|
|
|
* current action list. A clone of the current packet will recirculate, skip
|
|
|
|
|
* the remainder of the current action list and asynchronously resume pipeline
|
|
|
|
|
* processing in 'table' with the current metadata and action set. */
|
|
|
|
|
static void
|
|
|
|
|
compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table)
|
|
|
|
|
{
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx->freezing = true;
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
finish_freezing__(ctx, table);
|
2015-11-06 16:16:47 -08:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
static void
|
|
|
|
|
compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
2014-02-04 10:32:35 -08:00
|
|
|
|
int n;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
ovs_assert(eth_type_mpls(mpls->ethertype));
|
2013-09-27 06:55:19 +09:00
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
n = flow_count_mpls_labels(flow, ctx->wc);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (!n) {
|
2015-09-09 19:00:17 -07:00
|
|
|
|
xlate_commit_actions(ctx);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
} else if (n >= FLOW_MAX_MPLS_LABELS) {
|
|
|
|
|
if (ctx->xin->packet != NULL) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an "
|
2014-02-04 10:32:35 -08:00
|
|
|
|
"MPLS push action can't be performed as it would "
|
|
|
|
|
"have more MPLS LSEs than the %d supported.",
|
|
|
|
|
ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
|
2014-02-04 10:32:35 -08:00
|
|
|
|
return;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2013-09-27 06:55:19 +09:00
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
flow_push_mpls(flow, n, mpls->ethertype, ctx->wc);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
static void
|
2013-06-12 14:33:17 -07:00
|
|
|
|
compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2014-02-04 10:32:35 -08:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
2015-07-31 13:15:52 -07:00
|
|
|
|
int n = flow_count_mpls_labels(flow, ctx->wc);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
|
2016-02-24 16:10:42 -08:00
|
|
|
|
if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
|
2016-05-25 10:34:31 +09:00
|
|
|
|
ctx->was_mpls = true;
|
2014-06-24 08:46:31 +09:00
|
|
|
|
}
|
|
|
|
|
} else if (n >= FLOW_MAX_MPLS_LABELS) {
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (ctx->xin->packet != NULL) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an "
|
2014-02-04 10:32:35 -08:00
|
|
|
|
"MPLS pop action can't be performed as it has "
|
|
|
|
|
"more MPLS LSEs than the %d supported.",
|
|
|
|
|
ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
|
|
|
|
|
}
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ofpbuf_clear(ctx->odp_actions);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids)
|
|
|
|
|
{
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
|
|
|
|
|
|
|
|
|
if (!is_ip_any(flow)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ctx->wc->masks.nw_ttl = 0xff;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
if (flow->nw_ttl > 1) {
|
|
|
|
|
flow->nw_ttl--;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < ids->n_controllers; i++) {
|
|
|
|
|
execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
|
2016-02-19 15:53:26 -08:00
|
|
|
|
ids->cnt_ids[i], NULL, 0);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Stop processing for current table. */
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
static void
|
2013-10-24 13:19:34 -07:00
|
|
|
|
compose_set_mpls_label_action(struct xlate_ctx *ctx, ovs_be32 label)
|
|
|
|
|
{
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (eth_type_mpls(ctx->xin->flow.dl_type)) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_LABEL_MASK);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
set_mpls_lse_label(&ctx->xin->flow.mpls_lse[0], label);
|
2013-10-24 13:19:34 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
static void
|
2013-10-24 13:19:34 -07:00
|
|
|
|
compose_set_mpls_tc_action(struct xlate_ctx *ctx, uint8_t tc)
|
|
|
|
|
{
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (eth_type_mpls(ctx->xin->flow.dl_type)) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TC_MASK);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
set_mpls_lse_tc(&ctx->xin->flow.mpls_lse[0], tc);
|
2013-10-24 13:19:34 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
static void
|
2013-06-12 14:33:17 -07:00
|
|
|
|
compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (eth_type_mpls(ctx->xin->flow.dl_type)) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse[0], ttl);
|
2013-09-27 06:55:19 +09:00
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2013-06-12 14:33:17 -07:00
|
|
|
|
compose_dec_mpls_ttl_action(struct xlate_ctx *ctx)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
2013-06-18 23:55:47 -07:00
|
|
|
|
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (eth_type_mpls(flow->dl_type)) {
|
2014-09-30 13:34:43 -07:00
|
|
|
|
uint8_t ttl = mpls_lse_to_ttl(flow->mpls_lse[0]);
|
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
if (ttl > 1) {
|
|
|
|
|
ttl--;
|
|
|
|
|
set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
2016-02-19 15:53:26 -08:00
|
|
|
|
execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0,
|
|
|
|
|
NULL, 0);
|
2014-02-04 10:32:35 -08:00
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2014-09-30 13:34:43 -07:00
|
|
|
|
|
|
|
|
|
/* Stop processing for current table. */
|
|
|
|
|
return true;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_output_action(struct xlate_ctx *ctx,
|
2013-06-19 16:58:44 -07:00
|
|
|
|
ofp_port_t port, uint16_t max_len, bool may_packet_in)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ofp_port_t prev_nf_output_iface = ctx->nf_output_iface;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx->nf_output_iface = NF_OUT_DROP;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
switch (port) {
|
|
|
|
|
case OFPP_IN_PORT:
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
case OFPP_TABLE:
|
2013-06-19 16:58:44 -07:00
|
|
|
|
xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
|
2014-03-20 13:42:22 -07:00
|
|
|
|
0, may_packet_in, true);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
case OFPP_NORMAL:
|
|
|
|
|
xlate_normal(ctx);
|
|
|
|
|
break;
|
|
|
|
|
case OFPP_FLOOD:
|
|
|
|
|
flood_packets(ctx, false);
|
|
|
|
|
break;
|
|
|
|
|
case OFPP_ALL:
|
|
|
|
|
flood_packets(ctx, true);
|
|
|
|
|
break;
|
|
|
|
|
case OFPP_CONTROLLER:
|
2014-11-25 16:32:15 -08:00
|
|
|
|
execute_controller_action(ctx, max_len,
|
2015-02-03 22:24:18 -08:00
|
|
|
|
(ctx->in_group ? OFPR_GROUP
|
|
|
|
|
: ctx->in_action_set ? OFPR_ACTION_SET
|
|
|
|
|
: OFPR_ACTION),
|
2016-02-19 15:53:26 -08:00
|
|
|
|
0, NULL, 0);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
case OFPP_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case OFPP_LOCAL:
|
|
|
|
|
default:
|
2013-06-19 16:58:44 -07:00
|
|
|
|
if (port != ctx->xin->flow.in_port.ofp_port) {
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(ctx, port, NULL);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
|
|
|
|
xlate_report(ctx, "skipping output to input port");
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (prev_nf_output_iface == NF_OUT_FLOOD) {
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx->nf_output_iface = NF_OUT_FLOOD;
|
|
|
|
|
} else if (ctx->nf_output_iface == NF_OUT_DROP) {
|
|
|
|
|
ctx->nf_output_iface = prev_nf_output_iface;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else if (prev_nf_output_iface != NF_OUT_DROP &&
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx->nf_output_iface != NF_OUT_FLOOD) {
|
|
|
|
|
ctx->nf_output_iface = NF_OUT_MULTI;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_output_reg_action(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact_output_reg *or)
|
|
|
|
|
{
|
|
|
|
|
uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow);
|
|
|
|
|
if (port <= UINT16_MAX) {
|
|
|
|
|
union mf_subvalue value;
|
|
|
|
|
|
|
|
|
|
memset(&value, 0xff, sizeof value);
|
2015-07-31 13:15:52 -07:00
|
|
|
|
mf_write_subfield_flow(&or->src, &value, &ctx->wc->masks);
|
2013-06-19 16:58:44 -07:00
|
|
|
|
xlate_output_action(ctx, u16_to_ofp(port),
|
|
|
|
|
or->max_len, false);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_enqueue_action(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact_enqueue *enqueue)
|
|
|
|
|
{
|
2013-06-19 16:58:44 -07:00
|
|
|
|
ofp_port_t ofp_port = enqueue->port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
uint32_t queue_id = enqueue->queue;
|
|
|
|
|
uint32_t flow_priority, priority;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
/* Translate queue to priority. */
|
2013-07-06 11:46:48 -07:00
|
|
|
|
error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (error) {
|
|
|
|
|
/* Fall back to ordinary output action. */
|
|
|
|
|
xlate_output_action(ctx, enqueue->port, 0, false);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check output port. */
|
|
|
|
|
if (ofp_port == OFPP_IN_PORT) {
|
2013-06-19 16:58:44 -07:00
|
|
|
|
ofp_port = ctx->xin->flow.in_port.ofp_port;
|
|
|
|
|
} else if (ofp_port == ctx->xin->flow.in_port.ofp_port) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Add datapath actions. */
|
|
|
|
|
flow_priority = ctx->xin->flow.skb_priority;
|
|
|
|
|
ctx->xin->flow.skb_priority = priority;
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(ctx, ofp_port, NULL);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
ctx->xin->flow.skb_priority = flow_priority;
|
|
|
|
|
|
|
|
|
|
/* Update NetFlow output port. */
|
2015-07-24 09:35:58 -07:00
|
|
|
|
if (ctx->nf_output_iface == NF_OUT_DROP) {
|
|
|
|
|
ctx->nf_output_iface = ofp_port;
|
|
|
|
|
} else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
|
|
|
|
|
ctx->nf_output_iface = NF_OUT_MULTI;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
|
|
|
|
|
{
|
|
|
|
|
uint32_t skb_priority;
|
|
|
|
|
|
2013-07-06 11:46:48 -07:00
|
|
|
|
if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
ctx->xin->flow.skb_priority = skb_priority;
|
|
|
|
|
} else {
|
|
|
|
|
/* Couldn't translate queue to a priority. Nothing to do. A warning
|
|
|
|
|
* has already been logged. */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2013-06-13 18:38:24 -07:00
|
|
|
|
slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
const struct xbridge *xbridge = xbridge_;
|
|
|
|
|
struct xport *port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
switch (ofp_port) {
|
|
|
|
|
case OFPP_IN_PORT:
|
|
|
|
|
case OFPP_TABLE:
|
|
|
|
|
case OFPP_NORMAL:
|
|
|
|
|
case OFPP_FLOOD:
|
|
|
|
|
case OFPP_ALL:
|
|
|
|
|
case OFPP_NONE:
|
|
|
|
|
return true;
|
|
|
|
|
case OFPP_CONTROLLER: /* Not supported by the bundle action. */
|
|
|
|
|
return false;
|
|
|
|
|
default:
|
2013-06-13 18:38:24 -07:00
|
|
|
|
port = get_ofp_port(xbridge, ofp_port);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return port ? port->may_enable : false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_bundle_action(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact_bundle *bundle)
|
|
|
|
|
{
|
2013-06-19 16:58:44 -07:00
|
|
|
|
ofp_port_t port;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-31 13:15:52 -07:00
|
|
|
|
port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb,
|
2013-06-13 18:38:24 -07:00
|
|
|
|
CONST_CAST(struct xbridge *, ctx->xbridge));
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (bundle->dst.field) {
|
2015-07-31 13:15:52 -07:00
|
|
|
|
nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
} else {
|
|
|
|
|
xlate_output_action(ctx, port, 0, false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
ofproto-dpif-xlate: Cache full flowmod for learning.
Caching the results of xlate_learn was previously dependent on the state
of the 'may_learn' flag. This meant that if the caller did not specify
that this flow may learn, then a learn entry would not be cached.
However, the xlate_cache tends to be used on a recurring basis, so
failing to cache the learn entry can provide unexpected behaviour later
on, particularly in corner cases.
Such a corner case occurred previously:-
* Revalidation was requested.
* A flow with a learn action was dumped.
* The flow had no packets.
* The flow's corresponding xcache was cleared, and the flow revalidated.
* The flow went on to receive packets after the xcache is re-created.
In this case, the xcache would be re-created, but would not refresh the
timeouts on the learnt flow until the next time it was cleared, even if
it received more traffic. This would cause flows to time out sooner than
expected. Symptoms of this bug may include unexpected forwarding
behaviour or extraneous statistics being attributed to the wrong flow.
This patch fixes the issue by caching the entire flow_mod, including
actions, upon translating an xlate_learn action. This is used to perform
a flow_mod from scratch with the original flow, rather than simply
refreshing the rule that was created during the creation of the xcache.
Bug #1252997.
Reported-by: Scott Hendricks <shendricks@vmware.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-06-03 20:44:35 +12:00
|
|
|
|
xlate_learn_action__(struct xlate_ctx *ctx, const struct ofpact_learn *learn,
|
|
|
|
|
struct ofputil_flow_mod *fm, struct ofpbuf *ofpacts)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
ofproto-dpif-xlate: Cache full flowmod for learning.
Caching the results of xlate_learn was previously dependent on the state
of the 'may_learn' flag. This meant that if the caller did not specify
that this flow may learn, then a learn entry would not be cached.
However, the xlate_cache tends to be used on a recurring basis, so
failing to cache the learn entry can provide unexpected behaviour later
on, particularly in corner cases.
Such a corner case occurred previously:-
* Revalidation was requested.
* A flow with a learn action was dumped.
* The flow had no packets.
* The flow's corresponding xcache was cleared, and the flow revalidated.
* The flow went on to receive packets after the xcache is re-created.
In this case, the xcache would be re-created, but would not refresh the
timeouts on the learnt flow until the next time it was cleared, even if
it received more traffic. This would cause flows to time out sooner than
expected. Symptoms of this bug may include unexpected forwarding
behaviour or extraneous statistics being attributed to the wrong flow.
This patch fixes the issue by caching the entire flow_mod, including
actions, upon translating an xlate_learn action. This is used to perform
a flow_mod from scratch with the original flow, rather than simply
refreshing the rule that was created during the creation of the xcache.
Bug #1252997.
Reported-by: Scott Hendricks <shendricks@vmware.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-06-03 20:44:35 +12:00
|
|
|
|
learn_execute(learn, &ctx->xin->flow, fm, ofpacts);
|
|
|
|
|
if (ctx->xin->may_learn) {
|
|
|
|
|
ofproto_dpif_flow_mod(ctx->xbridge->ofproto, fm);
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
ofproto-dpif-xlate: Cache full flowmod for learning.
Caching the results of xlate_learn was previously dependent on the state
of the 'may_learn' flag. This meant that if the caller did not specify
that this flow may learn, then a learn entry would not be cached.
However, the xlate_cache tends to be used on a recurring basis, so
failing to cache the learn entry can provide unexpected behaviour later
on, particularly in corner cases.
Such a corner case occurred previously:-
* Revalidation was requested.
* A flow with a learn action was dumped.
* The flow had no packets.
* The flow's corresponding xcache was cleared, and the flow revalidated.
* The flow went on to receive packets after the xcache is re-created.
In this case, the xcache would be re-created, but would not refresh the
timeouts on the learnt flow until the next time it was cleared, even if
it received more traffic. This would cause flows to time out sooner than
expected. Symptoms of this bug may include unexpected forwarding
behaviour or extraneous statistics being attributed to the wrong flow.
This patch fixes the issue by caching the entire flow_mod, including
actions, upon translating an xlate_learn action. This is used to perform
a flow_mod from scratch with the original flow, rather than simply
refreshing the rule that was created during the creation of the xcache.
Bug #1252997.
Reported-by: Scott Hendricks <shendricks@vmware.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-06-03 20:44:35 +12:00
|
|
|
|
static void
|
|
|
|
|
xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
|
|
|
|
|
{
|
2015-07-31 13:15:52 -07:00
|
|
|
|
learn_mask(learn, ctx->wc);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_LEARN);
|
ofproto-dpif-xlate: Cache full flowmod for learning.
Caching the results of xlate_learn was previously dependent on the state
of the 'may_learn' flag. This meant that if the caller did not specify
that this flow may learn, then a learn entry would not be cached.
However, the xlate_cache tends to be used on a recurring basis, so
failing to cache the learn entry can provide unexpected behaviour later
on, particularly in corner cases.
Such a corner case occurred previously:-
* Revalidation was requested.
* A flow with a learn action was dumped.
* The flow had no packets.
* The flow's corresponding xcache was cleared, and the flow revalidated.
* The flow went on to receive packets after the xcache is re-created.
In this case, the xcache would be re-created, but would not refresh the
timeouts on the learnt flow until the next time it was cleared, even if
it received more traffic. This would cause flows to time out sooner than
expected. Symptoms of this bug may include unexpected forwarding
behaviour or extraneous statistics being attributed to the wrong flow.
This patch fixes the issue by caching the entire flow_mod, including
actions, upon translating an xlate_learn action. This is used to perform
a flow_mod from scratch with the original flow, rather than simply
refreshing the rule that was created during the creation of the xcache.
Bug #1252997.
Reported-by: Scott Hendricks <shendricks@vmware.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-06-03 20:44:35 +12:00
|
|
|
|
entry->u.learn.ofproto = ctx->xbridge->ofproto;
|
|
|
|
|
entry->u.learn.fm = xmalloc(sizeof *entry->u.learn.fm);
|
|
|
|
|
entry->u.learn.ofpacts = ofpbuf_new(64);
|
|
|
|
|
xlate_learn_action__(ctx, learn, entry->u.learn.fm,
|
|
|
|
|
entry->u.learn.ofpacts);
|
|
|
|
|
} else if (ctx->xin->may_learn) {
|
|
|
|
|
uint64_t ofpacts_stub[1024 / 8];
|
|
|
|
|
struct ofputil_flow_mod fm;
|
|
|
|
|
struct ofpbuf ofpacts;
|
|
|
|
|
|
|
|
|
|
ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
|
|
|
|
|
xlate_learn_action__(ctx, learn, &fm, &ofpacts);
|
|
|
|
|
ofpbuf_uninit(&ofpacts);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_fin_timeout__(struct rule_dpif *rule, uint16_t tcp_flags,
|
|
|
|
|
uint16_t idle_timeout, uint16_t hard_timeout)
|
|
|
|
|
{
|
|
|
|
|
if (tcp_flags & (TCP_FIN | TCP_RST)) {
|
|
|
|
|
rule_dpif_reduce_timeouts(rule, idle_timeout, hard_timeout);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_fin_timeout(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact_fin_timeout *oft)
|
|
|
|
|
{
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx->rule) {
|
|
|
|
|
xlate_fin_timeout__(ctx->rule, ctx->xin->tcp_flags,
|
|
|
|
|
oft->fin_idle_timeout, oft->fin_hard_timeout);
|
|
|
|
|
if (ctx->xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx->xin->xcache, XC_FIN_TIMEOUT);
|
2014-04-24 08:21:49 -07:00
|
|
|
|
/* XC_RULE already holds a reference on the rule, none is taken
|
|
|
|
|
* here. */
|
2014-04-10 16:00:28 +12:00
|
|
|
|
entry->u.fin.rule = ctx->rule;
|
|
|
|
|
entry->u.fin.idle = oft->fin_idle_timeout;
|
|
|
|
|
entry->u.fin.hard = oft->fin_hard_timeout;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_sample_action(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact_sample *os)
|
|
|
|
|
{
|
2015-02-24 15:33:59 -08:00
|
|
|
|
/* Scale the probability from 16-bit to 32-bit while representing
|
|
|
|
|
* the same percentage. */
|
|
|
|
|
uint32_t probability = (os->probability << 16) | os->probability;
|
|
|
|
|
|
2015-02-24 16:40:42 -08:00
|
|
|
|
if (!ctx->xbridge->support.variable_length_userdata) {
|
2015-02-24 15:33:59 -08:00
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
|
|
|
|
|
|
|
|
|
|
VLOG_ERR_RL(&rl, "ignoring NXAST_SAMPLE action because datapath "
|
|
|
|
|
"lacks support (needs Linux 3.10+ or kernel module from "
|
|
|
|
|
"OVS 1.11+)");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-09 19:00:17 -07:00
|
|
|
|
xlate_commit_actions(ctx);
|
2015-02-24 15:33:59 -08:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
union user_action_cookie cookie = {
|
|
|
|
|
.flow_sample = {
|
|
|
|
|
.type = USER_ACTION_COOKIE_FLOW_SAMPLE,
|
|
|
|
|
.probability = os->probability,
|
|
|
|
|
.collector_set_id = os->collector_set_id,
|
|
|
|
|
.obs_domain_id = os->obs_domain_id,
|
|
|
|
|
.obs_point_id = os->obs_point_id,
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
compose_sample_action(ctx, probability, &cookie, sizeof cookie.flow_sample,
|
|
|
|
|
ODPP_NONE, false);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2013-06-13 18:38:24 -07:00
|
|
|
|
may_receive(const struct xport *xport, struct xlate_ctx *ctx)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2014-04-24 13:18:18 -07:00
|
|
|
|
if (xport->config & (is_stp(&ctx->xin->flow)
|
2013-06-13 18:38:24 -07:00
|
|
|
|
? OFPUTIL_PC_NO_RECV_STP
|
|
|
|
|
: OFPUTIL_PC_NO_RECV)) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Only drop packets here if both forwarding and learning are
|
|
|
|
|
* disabled. If just learning is enabled, we need to have
|
|
|
|
|
* OFPP_NORMAL and the learning action have a look at the packet
|
|
|
|
|
* before we can drop it. */
|
2014-08-22 09:01:34 -07:00
|
|
|
|
if ((!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) ||
|
|
|
|
|
(!xport_rstp_forward_state(xport) && !xport_rstp_learn_state(xport))) {
|
2013-06-11 13:32:30 -07:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-11 13:23:29 +09:00
|
|
|
|
static void
|
2016-01-15 13:56:34 -08:00
|
|
|
|
xlate_write_actions__(struct xlate_ctx *ctx,
|
|
|
|
|
const struct ofpact *ofpacts, size_t ofpacts_len)
|
2013-10-11 13:23:29 +09:00
|
|
|
|
{
|
2014-11-03 14:24:01 -08:00
|
|
|
|
/* Maintain actset_output depending on the contents of the action set:
|
|
|
|
|
*
|
|
|
|
|
* - OFPP_UNSET, if there is no "output" action.
|
|
|
|
|
*
|
|
|
|
|
* - The output port, if there is an "output" action and no "group"
|
|
|
|
|
* action.
|
|
|
|
|
*
|
|
|
|
|
* - OFPP_UNSET, if there is a "group" action.
|
|
|
|
|
*/
|
|
|
|
|
if (!ctx->action_set_has_group) {
|
2016-01-15 13:56:34 -08:00
|
|
|
|
const struct ofpact *a;
|
|
|
|
|
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
|
|
|
|
|
if (a->type == OFPACT_OUTPUT) {
|
|
|
|
|
ctx->xin->flow.actset_output = ofpact_get_OUTPUT(a)->port;
|
|
|
|
|
} else if (a->type == OFPACT_GROUP) {
|
2014-11-03 14:24:01 -08:00
|
|
|
|
ctx->xin->flow.actset_output = OFPP_UNSET;
|
|
|
|
|
ctx->action_set_has_group = true;
|
2015-07-23 15:45:05 -07:00
|
|
|
|
break;
|
2014-11-03 14:24:01 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-15 13:56:34 -08:00
|
|
|
|
ofpbuf_put(&ctx->action_set, ofpacts, ofpacts_len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact_nest *a)
|
|
|
|
|
{
|
|
|
|
|
xlate_write_actions__(ctx, a->actions, ofpact_nest_get_action_len(a));
|
2013-10-11 13:23:29 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_action_set(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
uint64_t action_list_stub[1024 / 64];
|
|
|
|
|
struct ofpbuf action_list;
|
|
|
|
|
|
2015-02-03 22:24:18 -08:00
|
|
|
|
ctx->in_action_set = true;
|
2013-10-11 13:23:29 +09:00
|
|
|
|
ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
|
|
|
|
|
ofpacts_execute_action_set(&action_list, &ctx->action_set);
|
2015-03-11 18:01:51 -07:00
|
|
|
|
/* Clear the action set, as it is not needed any more. */
|
|
|
|
|
ofpbuf_clear(&ctx->action_set);
|
2015-03-02 17:29:44 -08:00
|
|
|
|
do_xlate_actions(action_list.data, action_list.size, ctx);
|
2015-02-03 22:24:18 -08:00
|
|
|
|
ctx->in_action_set = false;
|
2013-10-11 13:23:29 +09:00
|
|
|
|
ofpbuf_uninit(&action_list);
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
static void
|
2016-02-16 10:51:58 -08:00
|
|
|
|
freeze_put_unroll_xlate(struct xlate_ctx *ctx)
|
2015-03-26 11:18:16 -07:00
|
|
|
|
{
|
2016-02-16 10:51:58 -08:00
|
|
|
|
struct ofpact_unroll_xlate *unroll = ctx->frozen_actions.header;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
|
|
|
|
/* Restore the table_id and rule cookie for a potential PACKET
|
|
|
|
|
* IN if needed. */
|
|
|
|
|
if (!unroll ||
|
|
|
|
|
(ctx->table_id != unroll->rule_table_id
|
|
|
|
|
|| ctx->rule_cookie != unroll->rule_cookie)) {
|
2016-02-16 10:51:58 -08:00
|
|
|
|
unroll = ofpact_put_UNROLL_XLATE(&ctx->frozen_actions);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
unroll->rule_table_id = ctx->table_id;
|
|
|
|
|
unroll->rule_cookie = ctx->rule_cookie;
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx->frozen_actions.header = unroll;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Copy actions 'a' through 'end' to ctx->frozen_actions, which will be
|
|
|
|
|
* executed after thawing. Inserts an UNROLL_XLATE action, if none is already
|
|
|
|
|
* present, before any action that may depend on the current table ID or flow
|
|
|
|
|
* cookie. */
|
2015-03-26 11:18:16 -07:00
|
|
|
|
static void
|
2016-02-16 10:51:58 -08:00
|
|
|
|
freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end,
|
2015-03-26 11:18:16 -07:00
|
|
|
|
struct xlate_ctx *ctx)
|
|
|
|
|
{
|
2016-01-28 16:27:32 -08:00
|
|
|
|
for (; a < end; a = ofpact_next(a)) {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
switch (a->type) {
|
|
|
|
|
case OFPACT_OUTPUT_REG:
|
|
|
|
|
case OFPACT_GROUP:
|
|
|
|
|
case OFPACT_OUTPUT:
|
|
|
|
|
case OFPACT_CONTROLLER:
|
|
|
|
|
case OFPACT_DEC_MPLS_TTL:
|
|
|
|
|
case OFPACT_DEC_TTL:
|
2016-01-22 15:58:55 -08:00
|
|
|
|
/* These actions may generate asynchronous messages, which include
|
|
|
|
|
* table ID and flow cookie information. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
freeze_put_unroll_xlate(ctx);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2016-01-22 15:58:55 -08:00
|
|
|
|
case OFPACT_RESUBMIT:
|
|
|
|
|
if (ofpact_get_RESUBMIT(a)->table_id == 0xff) {
|
|
|
|
|
/* This resubmit action is relative to the current table, so we
|
|
|
|
|
* need to track what table that is.*/
|
2016-02-16 10:51:58 -08:00
|
|
|
|
freeze_put_unroll_xlate(ctx);
|
2016-01-22 15:58:55 -08:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
case OFPACT_SET_TUNNEL:
|
|
|
|
|
case OFPACT_REG_MOVE:
|
|
|
|
|
case OFPACT_SET_FIELD:
|
|
|
|
|
case OFPACT_STACK_PUSH:
|
|
|
|
|
case OFPACT_STACK_POP:
|
|
|
|
|
case OFPACT_LEARN:
|
|
|
|
|
case OFPACT_WRITE_METADATA:
|
2016-01-22 15:58:55 -08:00
|
|
|
|
case OFPACT_GOTO_TABLE:
|
2015-03-26 11:18:16 -07:00
|
|
|
|
case OFPACT_ENQUEUE:
|
|
|
|
|
case OFPACT_SET_VLAN_VID:
|
|
|
|
|
case OFPACT_SET_VLAN_PCP:
|
|
|
|
|
case OFPACT_STRIP_VLAN:
|
|
|
|
|
case OFPACT_PUSH_VLAN:
|
|
|
|
|
case OFPACT_SET_ETH_SRC:
|
|
|
|
|
case OFPACT_SET_ETH_DST:
|
|
|
|
|
case OFPACT_SET_IPV4_SRC:
|
|
|
|
|
case OFPACT_SET_IPV4_DST:
|
|
|
|
|
case OFPACT_SET_IP_DSCP:
|
|
|
|
|
case OFPACT_SET_IP_ECN:
|
|
|
|
|
case OFPACT_SET_IP_TTL:
|
|
|
|
|
case OFPACT_SET_L4_SRC_PORT:
|
|
|
|
|
case OFPACT_SET_L4_DST_PORT:
|
|
|
|
|
case OFPACT_SET_QUEUE:
|
|
|
|
|
case OFPACT_POP_QUEUE:
|
|
|
|
|
case OFPACT_PUSH_MPLS:
|
|
|
|
|
case OFPACT_POP_MPLS:
|
|
|
|
|
case OFPACT_SET_MPLS_LABEL:
|
|
|
|
|
case OFPACT_SET_MPLS_TC:
|
|
|
|
|
case OFPACT_SET_MPLS_TTL:
|
|
|
|
|
case OFPACT_MULTIPATH:
|
|
|
|
|
case OFPACT_BUNDLE:
|
|
|
|
|
case OFPACT_EXIT:
|
|
|
|
|
case OFPACT_UNROLL_XLATE:
|
|
|
|
|
case OFPACT_FIN_TIMEOUT:
|
|
|
|
|
case OFPACT_CLEAR_ACTIONS:
|
|
|
|
|
case OFPACT_WRITE_ACTIONS:
|
|
|
|
|
case OFPACT_METER:
|
|
|
|
|
case OFPACT_SAMPLE:
|
2015-07-29 22:02:41 -07:00
|
|
|
|
case OFPACT_DEBUG_RECIRC:
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
case OFPACT_CT:
|
2015-11-24 15:47:56 -08:00
|
|
|
|
case OFPACT_NAT:
|
2016-01-22 15:58:55 -08:00
|
|
|
|
/* These may not generate PACKET INs. */
|
2015-03-26 11:18:16 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_NOTE:
|
|
|
|
|
case OFPACT_CONJUNCTION:
|
2016-01-22 15:58:55 -08:00
|
|
|
|
/* These need not be copied for restoration. */
|
2015-03-26 11:18:16 -07:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
/* Copy the action over. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ofpbuf_put(&ctx->frozen_actions, a, OFPACT_ALIGN(a->len));
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
static void
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
put_ct_mark(const struct flow *flow, struct ofpbuf *odp_actions,
|
|
|
|
|
struct flow_wildcards *wc)
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
{
|
2016-04-15 11:36:05 -07:00
|
|
|
|
if (wc->masks.ct_mark) {
|
|
|
|
|
struct {
|
|
|
|
|
uint32_t key;
|
|
|
|
|
uint32_t mask;
|
|
|
|
|
} *odp_ct_mark;
|
|
|
|
|
|
|
|
|
|
odp_ct_mark = nl_msg_put_unspec_uninit(odp_actions, OVS_CT_ATTR_MARK,
|
|
|
|
|
sizeof(*odp_ct_mark));
|
|
|
|
|
odp_ct_mark->key = flow->ct_mark & wc->masks.ct_mark;
|
|
|
|
|
odp_ct_mark->mask = wc->masks.ct_mark;
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
static void
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
put_ct_label(const struct flow *flow, struct ofpbuf *odp_actions,
|
|
|
|
|
struct flow_wildcards *wc)
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
{
|
2016-05-03 18:20:51 -07:00
|
|
|
|
if (!ovs_u128_is_zero(wc->masks.ct_label)) {
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
struct {
|
|
|
|
|
ovs_u128 key;
|
|
|
|
|
ovs_u128 mask;
|
|
|
|
|
} *odp_ct_label;
|
|
|
|
|
|
|
|
|
|
odp_ct_label = nl_msg_put_unspec_uninit(odp_actions,
|
|
|
|
|
OVS_CT_ATTR_LABELS,
|
|
|
|
|
sizeof(*odp_ct_label));
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
odp_ct_label->key = ovs_u128_and(flow->ct_label, wc->masks.ct_label);
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
odp_ct_label->mask = wc->masks.ct_label;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Add support for connection tracking helper/ALGs.
This patch adds support for specifying a "helper" or ALG to assist
connection tracking for protocols that consist of multiple streams.
Initially, only support for FTP is included.
Below is an example set of flows to allow FTP control connections from
port 1->2 to establish active data connections in the reverse direction:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(alg=ftp,commit),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(table=1)
table=1,in_port=2,tcp,ct_state=+trk+est,action=1
table=1,in_port=2,tcp,ct_state=+trk+rel,action=ct(commit),1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-15 14:29:16 -07:00
|
|
|
|
static void
|
|
|
|
|
put_ct_helper(struct ofpbuf *odp_actions, struct ofpact_conntrack *ofc)
|
|
|
|
|
{
|
|
|
|
|
if (ofc->alg) {
|
|
|
|
|
if (ofc->alg == IPPORT_FTP) {
|
|
|
|
|
nl_msg_put_string(odp_actions, OVS_CT_ATTR_HELPER, "ftp");
|
|
|
|
|
} else {
|
|
|
|
|
VLOG_WARN("Cannot serialize ct_helper %d\n", ofc->alg);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-24 15:47:56 -08:00
|
|
|
|
static void
|
|
|
|
|
put_ct_nat(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
struct ofpact_nat *ofn = ctx->ct_nat_action;
|
|
|
|
|
size_t nat_offset;
|
|
|
|
|
|
|
|
|
|
if (!ofn) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nat_offset = nl_msg_start_nested(ctx->odp_actions, OVS_CT_ATTR_NAT);
|
|
|
|
|
if (ofn->flags & NX_NAT_F_SRC || ofn->flags & NX_NAT_F_DST) {
|
|
|
|
|
nl_msg_put_flag(ctx->odp_actions, ofn->flags & NX_NAT_F_SRC
|
|
|
|
|
? OVS_NAT_ATTR_SRC : OVS_NAT_ATTR_DST);
|
|
|
|
|
if (ofn->flags & NX_NAT_F_PERSISTENT) {
|
|
|
|
|
nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PERSISTENT);
|
|
|
|
|
}
|
|
|
|
|
if (ofn->flags & NX_NAT_F_PROTO_HASH) {
|
|
|
|
|
nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_HASH);
|
|
|
|
|
} else if (ofn->flags & NX_NAT_F_PROTO_RANDOM) {
|
|
|
|
|
nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_RANDOM);
|
|
|
|
|
}
|
|
|
|
|
if (ofn->range_af == AF_INET) {
|
2015-11-25 22:30:21 -08:00
|
|
|
|
nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
|
2015-11-24 15:47:56 -08:00
|
|
|
|
ofn->range.addr.ipv4.min);
|
|
|
|
|
if (ofn->range.addr.ipv4.max &&
|
2015-11-25 22:30:21 -08:00
|
|
|
|
(ntohl(ofn->range.addr.ipv4.max)
|
|
|
|
|
> ntohl(ofn->range.addr.ipv4.min))) {
|
|
|
|
|
nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
|
|
|
|
|
ofn->range.addr.ipv4.max);
|
2015-11-24 15:47:56 -08:00
|
|
|
|
}
|
|
|
|
|
} else if (ofn->range_af == AF_INET6) {
|
|
|
|
|
nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
|
|
|
|
|
&ofn->range.addr.ipv6.min,
|
|
|
|
|
sizeof ofn->range.addr.ipv6.min);
|
|
|
|
|
if (!ipv6_mask_is_any(&ofn->range.addr.ipv6.max) &&
|
|
|
|
|
memcmp(&ofn->range.addr.ipv6.max, &ofn->range.addr.ipv6.min,
|
|
|
|
|
sizeof ofn->range.addr.ipv6.max) > 0) {
|
|
|
|
|
nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
|
|
|
|
|
&ofn->range.addr.ipv6.max,
|
|
|
|
|
sizeof ofn->range.addr.ipv6.max);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) {
|
|
|
|
|
nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MIN,
|
|
|
|
|
ofn->range.proto.min);
|
|
|
|
|
if (ofn->range.proto.max &&
|
|
|
|
|
ofn->range.proto.max > ofn->range.proto.min) {
|
|
|
|
|
nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MAX,
|
|
|
|
|
ofn->range.proto.max);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
nl_msg_end_nested(ctx->odp_actions, nat_offset);
|
|
|
|
|
}
|
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
static void
|
|
|
|
|
compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
|
|
|
|
|
{
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
ovs_u128 old_ct_label = ctx->base_flow.ct_label;
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
uint32_t old_ct_mark = ctx->base_flow.ct_mark;
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
size_t ct_offset;
|
|
|
|
|
uint16_t zone;
|
|
|
|
|
|
|
|
|
|
/* Ensure that any prior actions are applied before composing the new
|
|
|
|
|
* conntrack action. */
|
|
|
|
|
xlate_commit_actions(ctx);
|
|
|
|
|
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
/* Process nested actions first, to populate the key. */
|
2015-11-24 15:47:56 -08:00
|
|
|
|
ctx->ct_nat_action = NULL;
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
ctx->wc->masks.ct_mark = 0;
|
|
|
|
|
ctx->wc->masks.ct_label.u64.hi = ctx->wc->masks.ct_label.u64.lo = 0;
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx);
|
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
if (ofc->zone_src.field) {
|
|
|
|
|
zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
|
|
|
|
|
} else {
|
|
|
|
|
zone = ofc->zone_imm;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
|
|
|
|
|
if (ofc->flags & NX_CT_F_COMMIT) {
|
|
|
|
|
nl_msg_put_flag(ctx->odp_actions, OVS_CT_ATTR_COMMIT);
|
|
|
|
|
}
|
|
|
|
|
nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
|
|
|
|
|
put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
|
Add support for connection tracking helper/ALGs.
This patch adds support for specifying a "helper" or ALG to assist
connection tracking for protocols that consist of multiple streams.
Initially, only support for FTP is included.
Below is an example set of flows to allow FTP control connections from
port 1->2 to establish active data connections in the reverse direction:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(alg=ftp,commit),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(table=1)
table=1,in_port=2,tcp,ct_state=+trk+est,action=1
table=1,in_port=2,tcp,ct_state=+trk+rel,action=ct(commit),1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-15 14:29:16 -07:00
|
|
|
|
put_ct_helper(ctx->odp_actions, ofc);
|
2015-11-24 15:47:56 -08:00
|
|
|
|
put_ct_nat(ctx);
|
|
|
|
|
ctx->ct_nat_action = NULL;
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
nl_msg_end_nested(ctx->odp_actions, ct_offset);
|
|
|
|
|
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
/* Restore the original ct fields in the key. These should only be exposed
|
|
|
|
|
* after recirculation to another table. */
|
|
|
|
|
ctx->base_flow.ct_mark = old_ct_mark;
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
ctx->wc->masks.ct_mark = old_ct_mark_mask;
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
|
ctx->base_flow.ct_label = old_ct_label;
|
ofproto-dpif-xlate: xlate ct_{mark, label} correctly.
When translating multiple ct actions in a row which include modification
of ct_mark or ct_labels, these fields could be incorrectly translated
into datapath actions, resulting in modification of these fields for
entries when the OpenFlow rules didn't actually specify the change.
For instance, the following OpenFlow actions:
ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
Would translate into the datapath actions:
ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
This commit fixes the issue by zeroing the wildcards for these fields
prior to performing nested actions translation (and restoring
afterwards). As such, these fields do not hold both the match and the
field modification values at the same time. As a result, the ct_mark and
ct_labels don't leak from one ct action to the next.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-15 11:36:04 -07:00
|
|
|
|
ctx->wc->masks.ct_label = old_ct_label_mask;
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
if (ofc->recirc_table == NX_CT_RECIRC_NONE) {
|
|
|
|
|
/* If we do not recirculate as part of this action, hide the results of
|
|
|
|
|
* connection tracking from subsequent recirculations. */
|
|
|
|
|
ctx->conntracked = false;
|
|
|
|
|
} else {
|
|
|
|
|
/* Use ct_* fields from datapath during recirculation upcall. */
|
|
|
|
|
ctx->conntracked = true;
|
2015-11-06 16:16:47 -08:00
|
|
|
|
compose_recirculate_and_fork(ctx, ofc->recirc_table);
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
static void
|
|
|
|
|
recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
/* No need to recirculate if already exiting. */
|
|
|
|
|
if (ctx->exit) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Do not consider recirculating unless the packet was previously MPLS. */
|
|
|
|
|
if (!ctx->was_mpls) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Special case these actions, only recirculating if necessary.
|
|
|
|
|
* This avoids the overhead of recirculation in common use-cases.
|
|
|
|
|
*/
|
|
|
|
|
switch (a->type) {
|
|
|
|
|
|
|
|
|
|
/* Output actions do not require recirculation. */
|
|
|
|
|
case OFPACT_OUTPUT:
|
|
|
|
|
case OFPACT_ENQUEUE:
|
|
|
|
|
case OFPACT_OUTPUT_REG:
|
|
|
|
|
/* Set actions that don't touch L3+ fields do not require recirculation. */
|
|
|
|
|
case OFPACT_SET_VLAN_VID:
|
|
|
|
|
case OFPACT_SET_VLAN_PCP:
|
|
|
|
|
case OFPACT_SET_ETH_SRC:
|
|
|
|
|
case OFPACT_SET_ETH_DST:
|
|
|
|
|
case OFPACT_SET_TUNNEL:
|
|
|
|
|
case OFPACT_SET_QUEUE:
|
|
|
|
|
/* If actions of a group require recirculation that can be detected
|
|
|
|
|
* when translating them. */
|
|
|
|
|
case OFPACT_GROUP:
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* Set field that don't touch L3+ fields don't require recirculation. */
|
|
|
|
|
case OFPACT_SET_FIELD:
|
|
|
|
|
if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* For simplicity, recirculate in all other cases. */
|
|
|
|
|
case OFPACT_CONTROLLER:
|
|
|
|
|
case OFPACT_BUNDLE:
|
|
|
|
|
case OFPACT_STRIP_VLAN:
|
|
|
|
|
case OFPACT_PUSH_VLAN:
|
|
|
|
|
case OFPACT_SET_IPV4_SRC:
|
|
|
|
|
case OFPACT_SET_IPV4_DST:
|
|
|
|
|
case OFPACT_SET_IP_DSCP:
|
|
|
|
|
case OFPACT_SET_IP_ECN:
|
|
|
|
|
case OFPACT_SET_IP_TTL:
|
|
|
|
|
case OFPACT_SET_L4_SRC_PORT:
|
|
|
|
|
case OFPACT_SET_L4_DST_PORT:
|
|
|
|
|
case OFPACT_REG_MOVE:
|
|
|
|
|
case OFPACT_STACK_PUSH:
|
|
|
|
|
case OFPACT_STACK_POP:
|
|
|
|
|
case OFPACT_DEC_TTL:
|
|
|
|
|
case OFPACT_SET_MPLS_LABEL:
|
|
|
|
|
case OFPACT_SET_MPLS_TC:
|
|
|
|
|
case OFPACT_SET_MPLS_TTL:
|
|
|
|
|
case OFPACT_DEC_MPLS_TTL:
|
|
|
|
|
case OFPACT_PUSH_MPLS:
|
|
|
|
|
case OFPACT_POP_MPLS:
|
|
|
|
|
case OFPACT_POP_QUEUE:
|
|
|
|
|
case OFPACT_FIN_TIMEOUT:
|
|
|
|
|
case OFPACT_RESUBMIT:
|
|
|
|
|
case OFPACT_LEARN:
|
|
|
|
|
case OFPACT_CONJUNCTION:
|
|
|
|
|
case OFPACT_MULTIPATH:
|
|
|
|
|
case OFPACT_NOTE:
|
|
|
|
|
case OFPACT_EXIT:
|
|
|
|
|
case OFPACT_SAMPLE:
|
|
|
|
|
case OFPACT_UNROLL_XLATE:
|
|
|
|
|
case OFPACT_CT:
|
|
|
|
|
case OFPACT_NAT:
|
|
|
|
|
case OFPACT_DEBUG_RECIRC:
|
|
|
|
|
case OFPACT_METER:
|
|
|
|
|
case OFPACT_CLEAR_ACTIONS:
|
|
|
|
|
case OFPACT_WRITE_ACTIONS:
|
|
|
|
|
case OFPACT_WRITE_METADATA:
|
|
|
|
|
case OFPACT_GOTO_TABLE:
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Recirculate */
|
|
|
|
|
ctx_trigger_freeze(ctx);
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
static void
|
|
|
|
|
do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
|
|
|
|
|
struct xlate_ctx *ctx)
|
|
|
|
|
{
|
2015-07-31 13:15:52 -07:00
|
|
|
|
struct flow_wildcards *wc = ctx->wc;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &ctx->xin->flow;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
const struct ofpact *a;
|
|
|
|
|
|
2014-11-11 11:53:47 -08:00
|
|
|
|
if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
|
2015-11-30 16:24:49 -02:00
|
|
|
|
tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
}
|
Set datapath mask bits when setting a flow field.
Since at the datapath interface we do not have set actions for
individual fields, but larger sets of fields for a given protocol
layer, the set action will in practice only ever apply to exactly
matched flows for the given protocol layer. For example, if the
reg_load changes the IP TTL, the corresponding datapath action will
rewrite also the IP addresses and TOS byte. Since these other field
values may not be explicitly set, they depend on the incoming flow field
values, and are hence all of them are set in the wildcards masks, when
the action is committed to the datapath. For the rare case, where the
reg_load action does not actually change the value, and no other flow
field values are set (or loaded), the datapath action is skipped, and
no mask bits are set. Such a datapath flow should, however, be
dependent on the specific field value, so the corresponding wildcard
mask bits must be set, lest the datapath flow be applied to packets
containing some other value in the field and the field value remain
unchanged regardless of the incoming value.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
2013-10-15 12:40:38 -07:00
|
|
|
|
/* dl_type already in the mask, not set below. */
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
|
|
|
|
|
struct ofpact_controller *controller;
|
|
|
|
|
const struct ofpact_metadata *metadata;
|
2013-10-24 13:19:29 -07:00
|
|
|
|
const struct ofpact_set_field *set_field;
|
|
|
|
|
const struct mf_field *mf;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-11-25 15:19:37 -08:00
|
|
|
|
if (ctx->error) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
recirc_for_mpls(a, ctx);
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (ctx->exit) {
|
|
|
|
|
/* Check if need to store the remaining actions for later
|
|
|
|
|
* execution. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (ctx->freezing) {
|
|
|
|
|
freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len),
|
2015-03-26 11:18:16 -07:00
|
|
|
|
ctx);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-06-24 08:46:31 +09:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
switch (a->type) {
|
|
|
|
|
case OFPACT_OUTPUT:
|
|
|
|
|
xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
|
|
|
|
|
ofpact_get_OUTPUT(a)->max_len, true);
|
|
|
|
|
break;
|
|
|
|
|
|
2013-09-01 18:30:17 -07:00
|
|
|
|
case OFPACT_GROUP:
|
2013-10-30 18:17:14 +09:00
|
|
|
|
if (xlate_group_action(ctx, ofpact_get_GROUP(a)->group_id)) {
|
2015-03-26 11:18:17 -07:00
|
|
|
|
/* Group could not be found. */
|
2013-10-30 18:17:14 +09:00
|
|
|
|
return;
|
|
|
|
|
}
|
2013-09-01 18:30:17 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_CONTROLLER:
|
|
|
|
|
controller = ofpact_get_CONTROLLER(a);
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
if (controller->pause) {
|
|
|
|
|
ctx->pause = controller;
|
|
|
|
|
ctx->xout->slow |= SLOW_CONTROLLER;
|
|
|
|
|
ctx_trigger_freeze(ctx);
|
|
|
|
|
a = ofpact_next(a);
|
|
|
|
|
} else {
|
|
|
|
|
execute_controller_action(ctx, controller->max_len,
|
|
|
|
|
controller->reason,
|
|
|
|
|
controller->controller_id,
|
|
|
|
|
controller->userdata,
|
|
|
|
|
controller->userdata_len);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_ENQUEUE:
|
2014-09-17 15:01:48 -07:00
|
|
|
|
memset(&wc->masks.skb_priority, 0xff,
|
|
|
|
|
sizeof wc->masks.skb_priority);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_VLAN_VID:
|
2013-08-02 21:17:31 -07:00
|
|
|
|
wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
|
2013-10-24 13:19:25 -07:00
|
|
|
|
if (flow->vlan_tci & htons(VLAN_CFI) ||
|
|
|
|
|
ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
|
|
|
|
|
flow->vlan_tci &= ~htons(VLAN_VID_MASK);
|
|
|
|
|
flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
|
|
|
|
|
| htons(VLAN_CFI));
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_VLAN_PCP:
|
2013-08-02 21:17:31 -07:00
|
|
|
|
wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
|
2013-10-24 13:19:25 -07:00
|
|
|
|
if (flow->vlan_tci & htons(VLAN_CFI) ||
|
|
|
|
|
ofpact_get_SET_VLAN_PCP(a)->push_vlan_if_needed) {
|
|
|
|
|
flow->vlan_tci &= ~htons(VLAN_PCP_MASK);
|
|
|
|
|
flow->vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp
|
|
|
|
|
<< VLAN_PCP_SHIFT) | VLAN_CFI);
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_STRIP_VLAN:
|
2013-08-02 21:17:31 -07:00
|
|
|
|
memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->vlan_tci = htons(0);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_PUSH_VLAN:
|
|
|
|
|
/* XXX 802.1AD(QinQ) */
|
2013-08-02 21:17:31 -07:00
|
|
|
|
memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->vlan_tci = htons(VLAN_CFI);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_ETH_SRC:
|
2015-08-28 14:55:11 -07:00
|
|
|
|
WC_MASK_FIELD(wc, dl_src);
|
|
|
|
|
flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_ETH_DST:
|
2015-08-28 14:55:11 -07:00
|
|
|
|
WC_MASK_FIELD(wc, dl_dst);
|
|
|
|
|
flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_IPV4_SRC:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
if (flow->dl_type == htons(ETH_TYPE_IP)) {
|
Set datapath mask bits when setting a flow field.
Since at the datapath interface we do not have set actions for
individual fields, but larger sets of fields for a given protocol
layer, the set action will in practice only ever apply to exactly
matched flows for the given protocol layer. For example, if the
reg_load changes the IP TTL, the corresponding datapath action will
rewrite also the IP addresses and TOS byte. Since these other field
values may not be explicitly set, they depend on the incoming flow field
values, and are hence all of them are set in the wildcards masks, when
the action is committed to the datapath. For the rare case, where the
reg_load action does not actually change the value, and no other flow
field values are set (or loaded), the datapath action is skipped, and
no mask bits are set. Such a datapath flow should, however, be
dependent on the specific field value, so the corresponding wildcard
mask bits must be set, lest the datapath flow be applied to packets
containing some other value in the field and the field value remain
unchanged regardless of the incoming value.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
2013-10-15 12:40:38 -07:00
|
|
|
|
memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_IPV4_DST:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
if (flow->dl_type == htons(ETH_TYPE_IP)) {
|
Set datapath mask bits when setting a flow field.
Since at the datapath interface we do not have set actions for
individual fields, but larger sets of fields for a given protocol
layer, the set action will in practice only ever apply to exactly
matched flows for the given protocol layer. For example, if the
reg_load changes the IP TTL, the corresponding datapath action will
rewrite also the IP addresses and TOS byte. Since these other field
values may not be explicitly set, they depend on the incoming flow field
values, and are hence all of them are set in the wildcards masks, when
the action is committed to the datapath. For the rare case, where the
reg_load action does not actually change the value, and no other flow
field values are set (or loaded), the datapath action is skipped, and
no mask bits are set. Such a datapath flow should, however, be
dependent on the specific field value, so the corresponding wildcard
mask bits must be set, lest the datapath flow be applied to packets
containing some other value in the field and the field value remain
unchanged regardless of the incoming value.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
2013-10-15 12:40:38 -07:00
|
|
|
|
memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2013-10-23 09:58:34 -07:00
|
|
|
|
case OFPACT_SET_IP_DSCP:
|
|
|
|
|
if (is_ip_any(flow)) {
|
Set datapath mask bits when setting a flow field.
Since at the datapath interface we do not have set actions for
individual fields, but larger sets of fields for a given protocol
layer, the set action will in practice only ever apply to exactly
matched flows for the given protocol layer. For example, if the
reg_load changes the IP TTL, the corresponding datapath action will
rewrite also the IP addresses and TOS byte. Since these other field
values may not be explicitly set, they depend on the incoming flow field
values, and are hence all of them are set in the wildcards masks, when
the action is committed to the datapath. For the rare case, where the
reg_load action does not actually change the value, and no other flow
field values are set (or loaded), the datapath action is skipped, and
no mask bits are set. Such a datapath flow should, however, be
dependent on the specific field value, so the corresponding wildcard
mask bits must be set, lest the datapath flow be applied to packets
containing some other value in the field and the field value remain
unchanged regardless of the incoming value.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
2013-10-15 12:40:38 -07:00
|
|
|
|
wc->masks.nw_tos |= IP_DSCP_MASK;
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->nw_tos &= ~IP_DSCP_MASK;
|
2013-10-23 09:58:34 -07:00
|
|
|
|
flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2013-10-22 17:20:43 -07:00
|
|
|
|
case OFPACT_SET_IP_ECN:
|
|
|
|
|
if (is_ip_any(flow)) {
|
|
|
|
|
wc->masks.nw_tos |= IP_ECN_MASK;
|
|
|
|
|
flow->nw_tos &= ~IP_ECN_MASK;
|
|
|
|
|
flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2013-10-22 17:20:44 -07:00
|
|
|
|
case OFPACT_SET_IP_TTL:
|
|
|
|
|
if (is_ip_any(flow)) {
|
|
|
|
|
wc->masks.nw_ttl = 0xff;
|
|
|
|
|
flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_SET_L4_SRC_PORT:
|
Fix setting transport ports with frags.
Packets with 'LATER' fragment do not have a transport header, so it is
not possible to either match on or set transport ports on such
packets. Matching is prevented by augmenting mf_are_prereqs_ok() with
a nw_frag 'LATER' bit check. Setting the transport headers on such
packets is prevented in three ways:
1. Flows with an explicit match on nw_frag, where the LATER bit is 1:
existing calls to the modified mf_are_prereqs_ok() prohibit using
transport header fields (port numbers) in OXM/NXM actions
(set_field, move). SET_TP_* actions need a new check on the LATER
bit.
2. Flows that wildcard the nw_frag LATER bit: At flow translation
time, add calls to mf_are_prereqs_ok() to make sure that we do not
use transport ports in flows that do not have them.
3. At action execution time, do not set transport ports, if the packet
does not have a full transport header. This ensures that we never
call the packet_set functions, that require a valid transport
header, with packets that do not have them. For example, if the
flow was created with a IPv6 first fragment that had the full TCP
header, but the next packet's first fragment is missing them.
3 alone would suffice for correct behavior, but 1 and 2 seem like a
right thing to do, anyway.
Currently, if we are setting port numbers, we will also match them,
due to us tracking the set fields with the same flow_wildcards as the
matched fields. Hence, if the incoming port number was not zero, the
flow would not match any packets with missing or truncated transport
headers. However, relying on no packets having zero port numbers
would not be very robust. Also, we may separate the tracking of set
and matched fields in the future, which would allow some flows that
blindly set port numbers to not match on them at all.
For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the
whole (potentially variable size) TCP header to be present. However,
when parsing a flow, we only require the fixed size portion of the TCP
header to be present, which would be enough to set the port numbers
and fix the TCP checksum.
Finally, we add tests testing the new behavior.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
|
|
|
|
if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
|
Set datapath mask bits when setting a flow field.
Since at the datapath interface we do not have set actions for
individual fields, but larger sets of fields for a given protocol
layer, the set action will in practice only ever apply to exactly
matched flows for the given protocol layer. For example, if the
reg_load changes the IP TTL, the corresponding datapath action will
rewrite also the IP addresses and TOS byte. Since these other field
values may not be explicitly set, they depend on the incoming flow field
values, and are hence all of them are set in the wildcards masks, when
the action is committed to the datapath. For the rare case, where the
reg_load action does not actually change the value, and no other flow
field values are set (or loaded), the datapath action is skipped, and
no mask bits are set. Such a datapath flow should, however, be
dependent on the specific field value, so the corresponding wildcard
mask bits must be set, lest the datapath flow be applied to packets
containing some other value in the field and the field value remain
unchanged regardless of the incoming value.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
2013-10-15 12:40:38 -07:00
|
|
|
|
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
|
|
|
|
|
memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_L4_DST_PORT:
|
Fix setting transport ports with frags.
Packets with 'LATER' fragment do not have a transport header, so it is
not possible to either match on or set transport ports on such
packets. Matching is prevented by augmenting mf_are_prereqs_ok() with
a nw_frag 'LATER' bit check. Setting the transport headers on such
packets is prevented in three ways:
1. Flows with an explicit match on nw_frag, where the LATER bit is 1:
existing calls to the modified mf_are_prereqs_ok() prohibit using
transport header fields (port numbers) in OXM/NXM actions
(set_field, move). SET_TP_* actions need a new check on the LATER
bit.
2. Flows that wildcard the nw_frag LATER bit: At flow translation
time, add calls to mf_are_prereqs_ok() to make sure that we do not
use transport ports in flows that do not have them.
3. At action execution time, do not set transport ports, if the packet
does not have a full transport header. This ensures that we never
call the packet_set functions, that require a valid transport
header, with packets that do not have them. For example, if the
flow was created with a IPv6 first fragment that had the full TCP
header, but the next packet's first fragment is missing them.
3 alone would suffice for correct behavior, but 1 and 2 seem like a
right thing to do, anyway.
Currently, if we are setting port numbers, we will also match them,
due to us tracking the set fields with the same flow_wildcards as the
matched fields. Hence, if the incoming port number was not zero, the
flow would not match any packets with missing or truncated transport
headers. However, relying on no packets having zero port numbers
would not be very robust. Also, we may separate the tracking of set
and matched fields in the future, which would allow some flows that
blindly set port numbers to not match on them at all.
For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the
whole (potentially variable size) TCP header to be present. However,
when parsing a flow, we only require the fixed size portion of the TCP
header to be present, which would be enough to set the port numbers
and fix the TCP checksum.
Finally, we add tests testing the new behavior.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
|
|
|
|
if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
|
Set datapath mask bits when setting a flow field.
Since at the datapath interface we do not have set actions for
individual fields, but larger sets of fields for a given protocol
layer, the set action will in practice only ever apply to exactly
matched flows for the given protocol layer. For example, if the
reg_load changes the IP TTL, the corresponding datapath action will
rewrite also the IP addresses and TOS byte. Since these other field
values may not be explicitly set, they depend on the incoming flow field
values, and are hence all of them are set in the wildcards masks, when
the action is committed to the datapath. For the rare case, where the
reg_load action does not actually change the value, and no other flow
field values are set (or loaded), the datapath action is skipped, and
no mask bits are set. Such a datapath flow should, however, be
dependent on the specific field value, so the corresponding wildcard
mask bits must be set, lest the datapath flow be applied to packets
containing some other value in the field and the field value remain
unchanged regardless of the incoming value.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
2013-10-15 12:40:38 -07:00
|
|
|
|
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
|
|
|
|
|
memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_RESUBMIT:
|
2016-02-24 16:10:42 -08:00
|
|
|
|
/* Freezing complicates resubmit. Some action in the flow
|
|
|
|
|
* entry found by resubmit might trigger freezing. If that
|
|
|
|
|
* happens, then we do not want to execute the resubmit again after
|
|
|
|
|
* during thawing, so we want to skip back to the head of the loop
|
|
|
|
|
* to avoid that, only adding any actions that follow the resubmit
|
|
|
|
|
* to the frozen actions.
|
ofproto-dpif-xlate: Do not execute resubmit again after recirculation.
Consider the following flow table:
table=0 actions=resubmit(,1),2
table=1 actions=debug_recirc
When debug_recirc triggers recirculation and we later resume processing,
only the output to port 2 should be executed, because the effects of
"resubmit" have already taken place. However, until now, the "resubmit"
was added to the actions to execute post-recirculation, resulting in an
infinite loop.
Now consider this flow table (as seen in the "MPLS handling" test in
ofproto-dpif.at):
table=0 actions=pop_mpls(0x0806),resubmit(,1)
table=1 ip,nw_dst=1.2.3.4 actions=controller
Here, we do want to add the "resubmit" to the actions to execute
post-recirculation, since the "resubmit" cannot be processed until after
recirculation makes the nw_dst field available.
This commit fixes the problem in both cases.
Found when testing a feature based on recirculation.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-27 09:14:18 -08:00
|
|
|
|
*/
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
|
ofproto-dpif-xlate: Do not execute resubmit again after recirculation.
Consider the following flow table:
table=0 actions=resubmit(,1),2
table=1 actions=debug_recirc
When debug_recirc triggers recirculation and we later resume processing,
only the output to port 2 should be executed, because the effects of
"resubmit" have already taken place. However, until now, the "resubmit"
was added to the actions to execute post-recirculation, resulting in an
infinite loop.
Now consider this flow table (as seen in the "MPLS handling" test in
ofproto-dpif.at):
table=0 actions=pop_mpls(0x0806),resubmit(,1)
table=1 ip,nw_dst=1.2.3.4 actions=controller
Here, we do want to add the "resubmit" to the actions to execute
post-recirculation, since the "resubmit" cannot be processed until after
recirculation makes the nw_dst field available.
This commit fixes the problem in both cases.
Found when testing a feature based on recirculation.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-01-27 09:14:18 -08:00
|
|
|
|
continue;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
case OFPACT_SET_TUNNEL:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_SET_QUEUE:
|
2014-09-17 15:01:48 -07:00
|
|
|
|
memset(&wc->masks.skb_priority, 0xff,
|
|
|
|
|
sizeof wc->masks.skb_priority);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_POP_QUEUE:
|
2014-09-17 15:01:48 -07:00
|
|
|
|
memset(&wc->masks.skb_priority, 0xff,
|
|
|
|
|
sizeof wc->masks.skb_priority);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->skb_priority = ctx->orig_skb_priority;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_REG_MOVE:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2013-10-24 13:19:29 -07:00
|
|
|
|
case OFPACT_SET_FIELD:
|
|
|
|
|
set_field = ofpact_get_SET_FIELD(a);
|
|
|
|
|
mf = set_field->field;
|
|
|
|
|
|
|
|
|
|
/* Set field action only ever overwrites packet's outermost
|
|
|
|
|
* applicable header fields. Do nothing if no header exists. */
|
2014-04-09 11:13:57 -07:00
|
|
|
|
if (mf->id == MFF_VLAN_VID) {
|
|
|
|
|
wc->masks.vlan_tci |= htons(VLAN_CFI);
|
|
|
|
|
if (!(flow->vlan_tci & htons(VLAN_CFI))) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else if ((mf->id == MFF_MPLS_LABEL || mf->id == MFF_MPLS_TC)
|
|
|
|
|
/* 'dl_type' is already unwildcarded. */
|
|
|
|
|
&& !eth_type_mpls(flow->dl_type)) {
|
|
|
|
|
break;
|
2013-10-24 13:19:29 -07:00
|
|
|
|
}
|
2015-10-20 00:58:05 -07:00
|
|
|
|
/* A flow may wildcard nw_frag. Do nothing if setting a transport
|
Fix setting transport ports with frags.
Packets with 'LATER' fragment do not have a transport header, so it is
not possible to either match on or set transport ports on such
packets. Matching is prevented by augmenting mf_are_prereqs_ok() with
a nw_frag 'LATER' bit check. Setting the transport headers on such
packets is prevented in three ways:
1. Flows with an explicit match on nw_frag, where the LATER bit is 1:
existing calls to the modified mf_are_prereqs_ok() prohibit using
transport header fields (port numbers) in OXM/NXM actions
(set_field, move). SET_TP_* actions need a new check on the LATER
bit.
2. Flows that wildcard the nw_frag LATER bit: At flow translation
time, add calls to mf_are_prereqs_ok() to make sure that we do not
use transport ports in flows that do not have them.
3. At action execution time, do not set transport ports, if the packet
does not have a full transport header. This ensures that we never
call the packet_set functions, that require a valid transport
header, with packets that do not have them. For example, if the
flow was created with a IPv6 first fragment that had the full TCP
header, but the next packet's first fragment is missing them.
3 alone would suffice for correct behavior, but 1 and 2 seem like a
right thing to do, anyway.
Currently, if we are setting port numbers, we will also match them,
due to us tracking the set fields with the same flow_wildcards as the
matched fields. Hence, if the incoming port number was not zero, the
flow would not match any packets with missing or truncated transport
headers. However, relying on no packets having zero port numbers
would not be very robust. Also, we may separate the tracking of set
and matched fields in the future, which would allow some flows that
blindly set port numbers to not match on them at all.
For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the
whole (potentially variable size) TCP header to be present. However,
when parsing a flow, we only require the fixed size portion of the TCP
header to be present, which would be enough to set the port numbers
and fix the TCP checksum.
Finally, we add tests testing the new behavior.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
|
|
|
|
* header field on a packet that does not have them. */
|
ofproto-dpif-xlate: Generate bitmasks in set_field.
Previously, whenever a set_field() action was executed, the entire field
would become masked and the entire field replaced, regardless of the
mask specified in the set_field() action.
In most cases this is fine, although it may lead to more specific
wildcards than strictly necessary. However, in a particular case with
connection tracking actions it could lead to the wrong behaviour.
Unlike most OpenFlow fields, the ct_{mark,labels} fields are typically
unknown until the ct(...,recirc_table=N,...) action is executed however
the packet may actually belong to a connection which has a nonzero value
for one of these fields. This can lead to the wrong behaviour with flows
such as the following:
in_port=1,ip,actions=ct(commit,exec(set_field(0x1/0x1->ct_mark))),2
in_port=2,ip,actions=ct(commit,exec(set_field(0x2/0x2->ct_mark))),1
Connections flowing through these actions will always update the ct_mark
field stored within the conntrack table. However, rather than modifying
only the specified bits (0x1 in one direction, 0x2 in the other), the
entire ct_mark field will be replaced. Such connections will constantly
toggle the value of ct_mark between 0x1 and 0x2, rather than becoming
0x3 and keeping that value.
This commit fixes the issue by ensuring that set_field actions only
modify the modified bits in the wildcards, rather than masking the
entire field.
Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
Fixes: 9daf23484fb1 ("Add connection tracking label support.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-04 14:56:03 -07:00
|
|
|
|
mf_mask_field_and_prereqs__(mf, &set_field->mask, wc);
|
Fix setting transport ports with frags.
Packets with 'LATER' fragment do not have a transport header, so it is
not possible to either match on or set transport ports on such
packets. Matching is prevented by augmenting mf_are_prereqs_ok() with
a nw_frag 'LATER' bit check. Setting the transport headers on such
packets is prevented in three ways:
1. Flows with an explicit match on nw_frag, where the LATER bit is 1:
existing calls to the modified mf_are_prereqs_ok() prohibit using
transport header fields (port numbers) in OXM/NXM actions
(set_field, move). SET_TP_* actions need a new check on the LATER
bit.
2. Flows that wildcard the nw_frag LATER bit: At flow translation
time, add calls to mf_are_prereqs_ok() to make sure that we do not
use transport ports in flows that do not have them.
3. At action execution time, do not set transport ports, if the packet
does not have a full transport header. This ensures that we never
call the packet_set functions, that require a valid transport
header, with packets that do not have them. For example, if the
flow was created with a IPv6 first fragment that had the full TCP
header, but the next packet's first fragment is missing them.
3 alone would suffice for correct behavior, but 1 and 2 seem like a
right thing to do, anyway.
Currently, if we are setting port numbers, we will also match them,
due to us tracking the set fields with the same flow_wildcards as the
matched fields. Hence, if the incoming port number was not zero, the
flow would not match any packets with missing or truncated transport
headers. However, relying on no packets having zero port numbers
would not be very robust. Also, we may separate the tracking of set
and matched fields in the future, which would allow some flows that
blindly set port numbers to not match on them at all.
For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the
whole (potentially variable size) TCP header to be present. However,
when parsing a flow, we only require the fixed size portion of the TCP
header to be present, which would be enough to set the port numbers
and fix the TCP checksum.
Finally, we add tests testing the new behavior.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
|
|
|
|
if (mf_are_prereqs_ok(mf, flow)) {
|
|
|
|
|
mf_set_flow_value_masked(mf, &set_field->value,
|
|
|
|
|
&set_field->mask, flow);
|
|
|
|
|
}
|
2013-10-24 13:19:29 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_STACK_PUSH:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
|
|
|
|
|
&ctx->stack);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_STACK_POP:
|
2013-08-02 21:17:31 -07:00
|
|
|
|
nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
|
|
|
|
|
&ctx->stack);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_PUSH_MPLS:
|
2014-02-04 10:32:35 -08:00
|
|
|
|
compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_POP_MPLS:
|
2014-02-04 10:32:35 -08:00
|
|
|
|
compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2013-10-24 13:19:34 -07:00
|
|
|
|
case OFPACT_SET_MPLS_LABEL:
|
2014-02-04 10:32:35 -08:00
|
|
|
|
compose_set_mpls_label_action(
|
|
|
|
|
ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
|
2015-03-26 11:18:17 -07:00
|
|
|
|
break;
|
2013-10-24 13:19:34 -07:00
|
|
|
|
|
|
|
|
|
case OFPACT_SET_MPLS_TC:
|
2014-02-04 10:32:35 -08:00
|
|
|
|
compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
|
2013-10-24 13:19:34 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_SET_MPLS_TTL:
|
2014-02-04 10:32:35 -08:00
|
|
|
|
compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_DEC_MPLS_TTL:
|
2013-06-12 14:33:17 -07:00
|
|
|
|
if (compose_dec_mpls_ttl_action(ctx)) {
|
2013-07-17 16:14:02 -07:00
|
|
|
|
return;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_DEC_TTL:
|
2013-08-02 21:17:31 -07:00
|
|
|
|
wc->masks.nw_ttl = 0xff;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
|
2013-07-17 16:14:02 -07:00
|
|
|
|
return;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_NOTE:
|
|
|
|
|
/* Nothing to do. */
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_MULTIPATH:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_BUNDLE:
|
|
|
|
|
xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_OUTPUT_REG:
|
|
|
|
|
xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_LEARN:
|
|
|
|
|
xlate_learn_action(ctx, ofpact_get_LEARN(a));
|
|
|
|
|
break;
|
|
|
|
|
|
2015-01-13 16:47:34 -08:00
|
|
|
|
case OFPACT_CONJUNCTION: {
|
|
|
|
|
/* A flow with a "conjunction" action represents part of a special
|
|
|
|
|
* kind of "set membership match". Such a flow should not actually
|
|
|
|
|
* get executed, but it could via, say, a "packet-out", even though
|
|
|
|
|
* that wouldn't be useful. Log it to help debugging. */
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
|
|
|
|
|
VLOG_INFO_RL(&rl, "executing no-op conjunction action");
|
2015-01-11 13:25:24 -08:00
|
|
|
|
break;
|
2015-01-13 16:47:34 -08:00
|
|
|
|
}
|
2015-01-11 13:25:24 -08:00
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_EXIT:
|
|
|
|
|
ctx->exit = true;
|
|
|
|
|
break;
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
case OFPACT_UNROLL_XLATE: {
|
|
|
|
|
struct ofpact_unroll_xlate *unroll = ofpact_get_UNROLL_XLATE(a);
|
|
|
|
|
|
|
|
|
|
/* Restore translation context data that was stored earlier. */
|
|
|
|
|
ctx->table_id = unroll->rule_table_id;
|
|
|
|
|
ctx->rule_cookie = unroll->rule_cookie;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_FIN_TIMEOUT:
|
2013-06-12 14:37:18 -07:00
|
|
|
|
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_CLEAR_ACTIONS:
|
2013-10-11 13:23:29 +09:00
|
|
|
|
ofpbuf_clear(&ctx->action_set);
|
2014-11-03 14:24:01 -08:00
|
|
|
|
ctx->xin->flow.actset_output = OFPP_UNSET;
|
|
|
|
|
ctx->action_set_has_group = false;
|
2013-10-11 13:23:29 +09:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_WRITE_ACTIONS:
|
2016-01-15 13:56:34 -08:00
|
|
|
|
xlate_write_actions(ctx, ofpact_get_WRITE_ACTIONS(a));
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OFPACT_WRITE_METADATA:
|
|
|
|
|
metadata = ofpact_get_WRITE_METADATA(a);
|
2013-06-12 14:37:18 -07:00
|
|
|
|
flow->metadata &= ~metadata->mask;
|
|
|
|
|
flow->metadata |= metadata->metadata & metadata->mask;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
|
2013-06-20 17:26:18 +03:00
|
|
|
|
case OFPACT_METER:
|
|
|
|
|
/* Not implemented yet. */
|
|
|
|
|
break;
|
|
|
|
|
|
2013-06-11 13:32:30 -07:00
|
|
|
|
case OFPACT_GOTO_TABLE: {
|
|
|
|
|
struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
|
|
|
|
|
|
2016-01-29 17:28:08 -08:00
|
|
|
|
ovs_assert(ctx->table_id < ogt->table_id);
|
|
|
|
|
|
2013-07-27 12:24:15 -07:00
|
|
|
|
xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
|
2014-03-20 13:42:22 -07:00
|
|
|
|
ogt->table_id, true, true);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OFPACT_SAMPLE:
|
|
|
|
|
xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
|
|
|
|
|
break;
|
2015-07-29 22:02:41 -07:00
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
case OFPACT_CT:
|
|
|
|
|
compose_conntrack_action(ctx, ofpact_get_CT(a));
|
|
|
|
|
break;
|
|
|
|
|
|
2015-11-24 15:47:56 -08:00
|
|
|
|
case OFPACT_NAT:
|
|
|
|
|
/* This will be processed by compose_conntrack_action(). */
|
|
|
|
|
ctx->ct_nat_action = ofpact_get_NAT(a);
|
|
|
|
|
break;
|
|
|
|
|
|
2015-07-29 22:02:41 -07:00
|
|
|
|
case OFPACT_DEBUG_RECIRC:
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx_trigger_freeze(ctx);
|
2015-07-29 22:02:41 -07:00
|
|
|
|
a = ofpact_next(a);
|
|
|
|
|
break;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2015-03-26 11:18:17 -07:00
|
|
|
|
|
|
|
|
|
/* Check if need to store this and the remaining actions for later
|
|
|
|
|
* execution. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (!ctx->error && ctx->exit && ctx_first_frozen_action(ctx)) {
|
|
|
|
|
freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx);
|
2015-03-26 11:18:17 -07:00
|
|
|
|
break;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
|
2014-08-06 18:49:44 -07:00
|
|
|
|
const struct flow *flow, ofp_port_t in_port,
|
|
|
|
|
struct rule_dpif *rule, uint16_t tcp_flags,
|
2015-07-31 13:34:16 -07:00
|
|
|
|
const struct dp_packet *packet, struct flow_wildcards *wc,
|
|
|
|
|
struct ofpbuf *odp_actions)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
|
|
|
|
xin->ofproto = ofproto;
|
|
|
|
|
xin->flow = *flow;
|
2014-08-06 18:49:44 -07:00
|
|
|
|
xin->flow.in_port.ofp_port = in_port;
|
2014-11-03 14:24:01 -08:00
|
|
|
|
xin->flow.actset_output = OFPP_UNSET;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xin->packet = packet;
|
|
|
|
|
xin->may_learn = packet != NULL;
|
|
|
|
|
xin->rule = rule;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
xin->xcache = NULL;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
xin->ofpacts = NULL;
|
|
|
|
|
xin->ofpacts_len = 0;
|
|
|
|
|
xin->tcp_flags = tcp_flags;
|
|
|
|
|
xin->resubmit_hook = NULL;
|
|
|
|
|
xin->report_hook = NULL;
|
|
|
|
|
xin->resubmit_stats = NULL;
|
2016-04-21 10:50:16 -07:00
|
|
|
|
xin->indentation = 0;
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
xin->depth = 0;
|
2015-09-15 19:37:06 -07:00
|
|
|
|
xin->resubmits = 0;
|
2015-07-31 13:15:52 -07:00
|
|
|
|
xin->wc = wc;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
xin->odp_actions = odp_actions;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
|
|
|
|
/* Do recirc lookup. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
xin->frozen_state = NULL;
|
2016-01-20 16:53:01 -08:00
|
|
|
|
if (flow->recirc_id) {
|
|
|
|
|
const struct recirc_id_node *node
|
|
|
|
|
= recirc_id_node_find(flow->recirc_id);
|
|
|
|
|
if (node) {
|
2016-02-16 10:51:58 -08:00
|
|
|
|
xin->frozen_state = &node->state;
|
2016-01-20 16:53:01 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
xlate_out_uninit(struct xlate_out *xout)
|
|
|
|
|
{
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (xout) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
recirc_refs_unref(&xout->recircs);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Translates the 'ofpacts_len' bytes of "struct ofpact"s starting at 'ofpacts'
|
|
|
|
|
* into datapath actions, using 'ctx', and discards the datapath actions. */
|
|
|
|
|
void
|
|
|
|
|
xlate_actions_for_side_effects(struct xlate_in *xin)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_out xout;
|
2015-11-25 15:19:37 -08:00
|
|
|
|
enum xlate_error error;
|
|
|
|
|
|
|
|
|
|
error = xlate_actions(xin, &xout);
|
|
|
|
|
if (error) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
|
|
|
|
|
VLOG_WARN_RL(&rl, "xlate_actions failed (%s)!", xlate_strerror(error));
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
xlate_out_uninit(&xout);
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-06 10:25:06 -07:00
|
|
|
|
static struct skb_priority_to_dscp *
|
|
|
|
|
get_skb_priority(const struct xport *xport, uint32_t skb_priority)
|
|
|
|
|
{
|
|
|
|
|
struct skb_priority_to_dscp *pdscp;
|
|
|
|
|
uint32_t hash;
|
|
|
|
|
|
|
|
|
|
hash = hash_int(skb_priority, 0);
|
|
|
|
|
HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
|
|
|
|
|
if (pdscp->skb_priority == skb_priority) {
|
|
|
|
|
return pdscp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
|
|
|
|
|
uint8_t *dscp)
|
|
|
|
|
{
|
|
|
|
|
struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
|
|
|
|
|
*dscp = pdscp ? pdscp->dscp : 0;
|
|
|
|
|
return pdscp != NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2014-09-17 15:01:48 -07:00
|
|
|
|
static size_t
|
|
|
|
|
count_skb_priorities(const struct xport *xport)
|
|
|
|
|
{
|
|
|
|
|
return hmap_count(&xport->skb_priorities);
|
|
|
|
|
}
|
|
|
|
|
|
2013-07-06 10:25:06 -07:00
|
|
|
|
static void
|
|
|
|
|
clear_skb_priorities(struct xport *xport)
|
|
|
|
|
{
|
2016-04-06 18:53:59 -07:00
|
|
|
|
struct skb_priority_to_dscp *pdscp;
|
2013-07-06 10:25:06 -07:00
|
|
|
|
|
2016-04-06 18:53:59 -07:00
|
|
|
|
HMAP_FOR_EACH_POP (pdscp, hmap_node, &xport->skb_priorities) {
|
2013-07-06 10:25:06 -07:00
|
|
|
|
free(pdscp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-26 14:44:39 -07:00
|
|
|
|
static bool
|
|
|
|
|
actions_output_to_local_port(const struct xlate_ctx *ctx)
|
|
|
|
|
{
|
2013-06-13 18:38:24 -07:00
|
|
|
|
odp_port_t local_odp_port = ofp_port_to_odp_port(ctx->xbridge, OFPP_LOCAL);
|
2013-06-26 14:44:39 -07:00
|
|
|
|
const struct nlattr *a;
|
|
|
|
|
unsigned int left;
|
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
NL_ATTR_FOR_EACH_UNSAFE (a, left, ctx->odp_actions->data,
|
|
|
|
|
ctx->odp_actions->size) {
|
2013-06-26 14:44:39 -07:00
|
|
|
|
if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT
|
|
|
|
|
&& nl_attr_get_odp_port(a) == local_odp_port) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2014-09-16 12:45:42 +09:00
|
|
|
|
#if defined(__linux__)
|
2014-09-09 15:06:52 -07:00
|
|
|
|
/* Returns the maximum number of packets that the Linux kernel is willing to
|
|
|
|
|
* queue up internally to certain kinds of software-implemented ports, or the
|
|
|
|
|
* default (and rarely modified) value if it cannot be determined. */
|
|
|
|
|
static int
|
|
|
|
|
netdev_max_backlog(void)
|
|
|
|
|
{
|
|
|
|
|
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
|
|
|
|
|
static int max_backlog = 1000; /* The normal default value. */
|
|
|
|
|
|
|
|
|
|
if (ovsthread_once_start(&once)) {
|
|
|
|
|
static const char filename[] = "/proc/sys/net/core/netdev_max_backlog";
|
|
|
|
|
FILE *stream;
|
|
|
|
|
int n;
|
|
|
|
|
|
|
|
|
|
stream = fopen(filename, "r");
|
|
|
|
|
if (!stream) {
|
2015-09-08 16:31:30 -07:00
|
|
|
|
VLOG_INFO("%s: open failed (%s)", filename, ovs_strerror(errno));
|
2014-09-09 15:06:52 -07:00
|
|
|
|
} else {
|
|
|
|
|
if (fscanf(stream, "%d", &n) != 1) {
|
|
|
|
|
VLOG_WARN("%s: read error", filename);
|
|
|
|
|
} else if (n <= 100) {
|
|
|
|
|
VLOG_WARN("%s: unexpectedly small value %d", filename, n);
|
|
|
|
|
} else {
|
|
|
|
|
max_backlog = n;
|
|
|
|
|
}
|
|
|
|
|
fclose(stream);
|
|
|
|
|
}
|
|
|
|
|
ovsthread_once_done(&once);
|
|
|
|
|
|
|
|
|
|
VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return max_backlog;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
|
|
|
|
|
* 'odp_actions'. */
|
|
|
|
|
static int
|
|
|
|
|
count_output_actions(const struct ofpbuf *odp_actions)
|
|
|
|
|
{
|
|
|
|
|
const struct nlattr *a;
|
|
|
|
|
size_t left;
|
|
|
|
|
int n = 0;
|
|
|
|
|
|
2015-03-02 17:29:44 -08:00
|
|
|
|
NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
|
2014-09-09 15:06:52 -07:00
|
|
|
|
if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
|
|
|
|
|
n++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return n;
|
|
|
|
|
}
|
2014-09-16 12:45:42 +09:00
|
|
|
|
#endif /* defined(__linux__) */
|
2014-09-09 15:06:52 -07:00
|
|
|
|
|
|
|
|
|
/* Returns true if 'odp_actions' contains more output actions than the datapath
|
|
|
|
|
* can reliably handle in one go. On Linux, this is the value of the
|
|
|
|
|
* net.core.netdev_max_backlog sysctl, which limits the maximum number of
|
|
|
|
|
* packets that the kernel is willing to queue up for processing while the
|
|
|
|
|
* datapath is processing a set of actions. */
|
|
|
|
|
static bool
|
2014-09-16 12:45:42 +09:00
|
|
|
|
too_many_output_actions(const struct ofpbuf *odp_actions OVS_UNUSED)
|
2014-09-09 15:06:52 -07:00
|
|
|
|
{
|
|
|
|
|
#ifdef __linux__
|
2015-03-02 17:29:44 -08:00
|
|
|
|
return (odp_actions->size / NL_A_U32_SIZE > netdev_max_backlog()
|
2014-09-09 15:06:52 -07:00
|
|
|
|
&& count_output_actions(odp_actions) > netdev_max_backlog());
|
|
|
|
|
#else
|
|
|
|
|
/* OSes other than Linux might have similar limits, but we don't know how
|
|
|
|
|
* to determine them.*/
|
|
|
|
|
return false;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-23 14:43:26 -07:00
|
|
|
|
static void
|
|
|
|
|
xlate_wc_init(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
flow_wildcards_init_catchall(ctx->wc);
|
|
|
|
|
|
|
|
|
|
/* Some fields we consider to always be examined. */
|
2015-08-25 13:55:03 -07:00
|
|
|
|
WC_MASK_FIELD(ctx->wc, in_port);
|
|
|
|
|
WC_MASK_FIELD(ctx->wc, dl_type);
|
2015-07-23 14:43:26 -07:00
|
|
|
|
if (is_ip_any(&ctx->xin->flow)) {
|
2015-08-25 13:55:03 -07:00
|
|
|
|
WC_MASK_FIELD_MASK(ctx->wc, nw_frag, FLOW_NW_FRAG_MASK);
|
2015-07-23 14:43:26 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->xbridge->support.odp.recirc) {
|
|
|
|
|
/* Always exactly match recirc_id when datapath supports
|
|
|
|
|
* recirculation. */
|
2015-08-25 13:55:03 -07:00
|
|
|
|
WC_MASK_FIELD(ctx->wc, recirc_id);
|
2015-07-23 14:43:26 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->xbridge->netflow) {
|
|
|
|
|
netflow_mask_wc(&ctx->xin->flow, ctx->wc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tnl_wc_init(&ctx->xin->flow, ctx->wc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_wc_finish(struct xlate_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
/* Clear the metadata and register wildcard masks, because we won't
|
|
|
|
|
* use non-header fields as part of the cache. */
|
|
|
|
|
flow_wildcards_clear_non_packet_fields(ctx->wc);
|
|
|
|
|
|
|
|
|
|
/* ICMPv4 and ICMPv6 have 8-bit "type" and "code" fields. struct flow
|
|
|
|
|
* uses the low 8 bits of the 16-bit tp_src and tp_dst members to
|
|
|
|
|
* represent these fields. The datapath interface, on the other hand,
|
|
|
|
|
* represents them with just 8 bits each. This means that if the high
|
|
|
|
|
* 8 bits of the masks for these fields somehow become set, then they
|
|
|
|
|
* will get chopped off by a round trip through the datapath, and
|
|
|
|
|
* revalidation will spot that as an inconsistency and delete the flow.
|
|
|
|
|
* Avoid the problem here by making sure that only the low 8 bits of
|
|
|
|
|
* either field can be unwildcarded for ICMP.
|
|
|
|
|
*/
|
2016-05-08 10:34:10 -07:00
|
|
|
|
if (is_icmpv4(&ctx->xin->flow, NULL) || is_icmpv6(&ctx->xin->flow, NULL)) {
|
2015-07-23 14:43:26 -07:00
|
|
|
|
ctx->wc->masks.tp_src &= htons(UINT8_MAX);
|
|
|
|
|
ctx->wc->masks.tp_dst &= htons(UINT8_MAX);
|
|
|
|
|
}
|
|
|
|
|
/* VLAN_TCI CFI bit must be matched if any of the TCI is matched. */
|
|
|
|
|
if (ctx->wc->masks.vlan_tci) {
|
|
|
|
|
ctx->wc->masks.vlan_tci |= htons(VLAN_CFI);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* Translates the flow, actions, or rule in 'xin' into datapath actions in
|
|
|
|
|
* 'xout'.
|
2013-08-20 11:16:14 -07:00
|
|
|
|
* The caller must take responsibility for eventually freeing 'xout', with
|
2015-11-25 15:19:37 -08:00
|
|
|
|
* xlate_out_uninit().
|
|
|
|
|
* Returns 'XLATE_OK' if translation was successful. In case of an error an
|
|
|
|
|
* empty set of actions will be returned in 'xin->odp_actions' (if non-NULL),
|
|
|
|
|
* so that most callers may ignore the return value and transparently install a
|
|
|
|
|
* drop flow when the translation fails. */
|
|
|
|
|
enum xlate_error
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
|
2013-06-11 13:32:30 -07:00
|
|
|
|
{
|
2015-07-29 22:31:07 -07:00
|
|
|
|
*xout = (struct xlate_out) {
|
|
|
|
|
.slow = 0,
|
2015-11-25 15:19:37 -08:00
|
|
|
|
.recircs = RECIRC_REFS_EMPTY_INITIALIZER,
|
2015-07-29 22:31:07 -07:00
|
|
|
|
};
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2015-07-23 16:24:29 -07:00
|
|
|
|
struct xbridge *xbridge = xbridge_lookup(xcfg, xin->ofproto);
|
|
|
|
|
if (!xbridge) {
|
2015-11-25 15:19:37 -08:00
|
|
|
|
return XLATE_BRIDGE_NOT_FOUND;
|
2015-07-23 16:24:29 -07:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-12 14:37:18 -07:00
|
|
|
|
struct flow *flow = &xin->flow;
|
|
|
|
|
|
2015-07-23 16:24:29 -07:00
|
|
|
|
union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
|
|
|
|
|
uint64_t action_set_stub[1024 / 8];
|
2016-02-16 10:51:58 -08:00
|
|
|
|
uint64_t frozen_actions_stub[1024 / 8];
|
2015-07-31 13:34:16 -07:00
|
|
|
|
uint64_t actions_stub[256 / 8];
|
|
|
|
|
struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
|
2015-07-23 16:24:29 -07:00
|
|
|
|
struct xlate_ctx ctx = {
|
|
|
|
|
.xin = xin,
|
|
|
|
|
.xout = xout,
|
|
|
|
|
.base_flow = *flow,
|
2015-12-04 12:36:48 -02:00
|
|
|
|
.orig_tunnel_ipv6_dst = flow_tnl_dst(&flow->tunnel),
|
2015-07-23 16:24:29 -07:00
|
|
|
|
.xbridge = xbridge,
|
|
|
|
|
.stack = OFPBUF_STUB_INITIALIZER(stack_stub),
|
|
|
|
|
.rule = xin->rule,
|
2016-04-22 17:45:03 -07:00
|
|
|
|
.wc = (xin->wc
|
|
|
|
|
? xin->wc
|
2016-05-17 16:29:39 -07:00
|
|
|
|
: &(struct flow_wildcards) { .masks = { .dl_type = 0 } }),
|
2015-07-31 13:34:16 -07:00
|
|
|
|
.odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions,
|
2015-07-23 16:24:29 -07:00
|
|
|
|
|
2016-04-21 10:50:16 -07:00
|
|
|
|
.indentation = xin->indentation,
|
ofproto-dpif: Do not count resubmit to later tables against limit.
Open vSwitch must ensure that flow translation takes a finite amount of
time. Until now it has implemented this by limiting the depth of
recursion. The initial limit, in version 1.0.1, was no recursion at all,
and then over the years it has increased to 8 levels, then 16, then 32,
and 64 for the last few years. Now reports are coming in that 64 levels
are inadequate for some OVN setups. The natural inclination would be to
double the limit again to 128 levels.
This commit attempts another approach. Instead of increasing the limit,
it reduces the class of resubmits that count against the limit. Since the
goal for the depth limit is to prevent an infinite amount of work, it's
not necessary to count resubmits that can't lead to infinite work. In
particular, a resubmit from a table numbered x to a table y > x cannot do
this, because any OpenFlow switch has a finite number of tables. Because
in fact a resubmit (or goto_table) from one table to a later table is the
most common form of an OpenFlow pipeline, I suspect that this will greatly
alleviate the pressure to increase the depth limit.
Reported-by: Guru Shetty <guru@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
2016-04-21 10:50:17 -07:00
|
|
|
|
.depth = xin->depth,
|
2015-09-15 19:37:06 -07:00
|
|
|
|
.resubmits = xin->resubmits,
|
2015-07-23 16:24:29 -07:00
|
|
|
|
.in_group = false,
|
|
|
|
|
.in_action_set = false,
|
|
|
|
|
|
|
|
|
|
.table_id = 0,
|
|
|
|
|
.rule_cookie = OVS_BE64_MAX,
|
|
|
|
|
.orig_skb_priority = flow->skb_priority,
|
|
|
|
|
.sflow_n_outputs = 0,
|
|
|
|
|
.sflow_odp_port = 0,
|
2015-07-24 09:35:58 -07:00
|
|
|
|
.nf_output_iface = NF_OUT_DROP,
|
2015-07-23 16:24:29 -07:00
|
|
|
|
.exit = false,
|
2015-11-25 15:19:37 -08:00
|
|
|
|
.error = XLATE_OK,
|
2015-07-23 17:08:14 -07:00
|
|
|
|
.mirrors = 0,
|
2015-07-23 16:24:29 -07:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
.freezing = false,
|
|
|
|
|
.frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
.pause = NULL,
|
2015-07-23 16:24:29 -07:00
|
|
|
|
|
2016-05-25 10:34:31 +09:00
|
|
|
|
.was_mpls = false,
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
.conntracked = false,
|
2015-07-23 16:24:29 -07:00
|
|
|
|
|
2015-11-24 15:47:56 -08:00
|
|
|
|
.ct_nat_action = NULL,
|
|
|
|
|
|
2015-07-23 16:24:29 -07:00
|
|
|
|
.action_set_has_group = false,
|
|
|
|
|
.action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
|
|
|
|
|
};
|
2015-07-23 17:04:36 -07:00
|
|
|
|
|
|
|
|
|
/* 'base_flow' reflects the packet as it came in, but we need it to reflect
|
2016-04-25 11:27:58 -07:00
|
|
|
|
* the packet as the datapath will treat it for output actions. Our
|
|
|
|
|
* datapath doesn't retain tunneling information without us re-setting
|
|
|
|
|
* it, so clear the tunnel data.
|
2015-07-23 17:04:36 -07:00
|
|
|
|
*/
|
2016-04-25 11:27:58 -07:00
|
|
|
|
|
2015-07-23 16:24:29 -07:00
|
|
|
|
memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel);
|
2015-07-23 17:04:36 -07:00
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
|
2016-04-22 17:45:03 -07:00
|
|
|
|
xlate_wc_init(&ctx);
|
2015-07-23 16:24:29 -07:00
|
|
|
|
|
2013-06-13 18:38:24 -07:00
|
|
|
|
COVERAGE_INC(xlate_actions);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (xin->frozen_state) {
|
|
|
|
|
const struct frozen_state *state = xin->frozen_state;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
xlate_report(&ctx, "Thawing frozen state:");
|
2015-07-29 22:03:31 -07:00
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
if (xin->ofpacts_len > 0 || ctx.rule) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
|
2015-07-29 22:03:31 -07:00
|
|
|
|
const char *conflict = xin->ofpacts_len ? "actions" : "rule";
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
2015-07-29 22:03:31 -07:00
|
|
|
|
VLOG_WARN_RL(&rl, "Recirculation conflict (%s)!", conflict);
|
|
|
|
|
xlate_report(&ctx, "- Recirculation conflict (%s)!", conflict);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ctx.error = XLATE_RECIRCULATION_CONFLICT;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
goto exit;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Set the bridge for post-recirculation processing if needed. */
|
2016-01-18 14:47:40 -08:00
|
|
|
|
if (!uuid_equals(ofproto_dpif_get_uuid(ctx.xbridge->ofproto),
|
|
|
|
|
&state->ofproto_uuid)) {
|
2015-03-26 11:18:16 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2015-07-29 20:32:12 -07:00
|
|
|
|
const struct xbridge *new_bridge
|
2016-01-18 14:47:40 -08:00
|
|
|
|
= xbridge_lookup_by_uuid(xcfg, &state->ofproto_uuid);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
|
|
|
|
if (OVS_UNLIKELY(!new_bridge)) {
|
|
|
|
|
/* Drop the packet if the bridge cannot be found. */
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
|
2016-02-16 10:51:58 -08:00
|
|
|
|
VLOG_WARN_RL(&rl, "Frozen bridge no longer exists.");
|
|
|
|
|
xlate_report(&ctx, "- Frozen bridge no longer exists.");
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ctx.error = XLATE_BRIDGE_NOT_FOUND;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
goto exit;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
ctx.xbridge = new_bridge;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Set the thawed table id. Note: A table lookup is done only if there
|
|
|
|
|
* are no frozen actions. */
|
2015-07-29 20:32:12 -07:00
|
|
|
|
ctx.table_id = state->table_id;
|
2015-07-29 22:03:31 -07:00
|
|
|
|
xlate_report(&ctx, "- Resuming from table %"PRIu8, ctx.table_id);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
if (!state->conntracked) {
|
|
|
|
|
clear_conntrack(flow);
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* Restore pipeline metadata. May change flow's in_port and other
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* metadata to the values that existed when freezing was triggered. */
|
|
|
|
|
frozen_metadata_to_flow(&state->metadata, flow);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
|
|
|
|
|
/* Restore stack, if any. */
|
2015-07-29 20:32:12 -07:00
|
|
|
|
if (state->stack) {
|
2016-01-20 16:47:14 -08:00
|
|
|
|
ofpbuf_put(&ctx.stack, state->stack,
|
|
|
|
|
state->n_stack * sizeof *state->stack);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-29 22:13:26 -07:00
|
|
|
|
/* Restore mirror state. */
|
|
|
|
|
ctx.mirrors = state->mirrors;
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* Restore action set, if any. */
|
2015-07-29 20:32:12 -07:00
|
|
|
|
if (state->action_set_len) {
|
2015-07-29 22:03:31 -07:00
|
|
|
|
xlate_report_actions(&ctx, "- Restoring action set",
|
2016-01-18 14:43:01 -08:00
|
|
|
|
state->action_set, state->action_set_len);
|
2015-07-29 22:03:31 -07:00
|
|
|
|
|
2016-01-15 13:56:34 -08:00
|
|
|
|
flow->actset_output = OFPP_UNSET;
|
|
|
|
|
xlate_write_actions__(&ctx, state->action_set,
|
|
|
|
|
state->action_set_len);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Restore frozen actions. If there are no actions, processing will
|
|
|
|
|
* start with a lookup in the table set above. */
|
2016-01-18 14:43:01 -08:00
|
|
|
|
xin->ofpacts = state->ofpacts;
|
|
|
|
|
xin->ofpacts_len = state->ofpacts_len;
|
|
|
|
|
if (state->ofpacts_len) {
|
2015-07-29 22:03:31 -07:00
|
|
|
|
xlate_report_actions(&ctx, "- Restoring actions",
|
|
|
|
|
xin->ofpacts, xin->ofpacts_len);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
|
|
|
|
} else if (OVS_UNLIKELY(flow->recirc_id)) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
|
|
|
|
|
|
|
|
|
|
VLOG_WARN_RL(&rl, "Recirculation context not found for ID %"PRIx32,
|
|
|
|
|
flow->recirc_id);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
ctx.error = XLATE_NO_RECIRCULATION_CONTEXT;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
goto exit;
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2015-06-11 15:53:43 -07:00
|
|
|
|
/* The bridge is now known so obtain its table version. */
|
|
|
|
|
ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2013-10-09 13:23:31 -07:00
|
|
|
|
if (!xin->ofpacts && !ctx.rule) {
|
2015-07-23 13:31:04 -07:00
|
|
|
|
ctx.rule = rule_dpif_lookup_from_table(
|
2016-04-22 17:45:03 -07:00
|
|
|
|
ctx.xbridge->ofproto, ctx.tables_version, flow, ctx.wc,
|
2015-08-02 11:51:32 -07:00
|
|
|
|
ctx.xin->resubmit_stats, &ctx.table_id,
|
2015-07-23 13:31:04 -07:00
|
|
|
|
flow->in_port.ofp_port, true, true);
|
2013-10-09 13:23:31 -07:00
|
|
|
|
if (ctx.xin->resubmit_stats) {
|
2015-07-23 13:31:04 -07:00
|
|
|
|
rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);
|
2013-10-09 13:23:31 -07:00
|
|
|
|
}
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx.xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);
|
2015-07-23 13:31:04 -07:00
|
|
|
|
entry->u.rule = ctx.rule;
|
2015-08-02 11:51:32 -07:00
|
|
|
|
rule_dpif_ref(ctx.rule);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
}
|
2014-11-04 11:17:11 -08:00
|
|
|
|
|
|
|
|
|
if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) {
|
2015-07-23 13:31:04 -07:00
|
|
|
|
ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0);
|
2014-11-04 11:17:11 -08:00
|
|
|
|
}
|
2013-10-09 13:23:31 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Get the proximate input port of the packet. (If xin->frozen_state,
|
2015-07-29 14:20:16 -07:00
|
|
|
|
* flow->in_port is the ultimate input port of the packet.) */
|
|
|
|
|
struct xport *in_port = get_ofp_port(xbridge,
|
|
|
|
|
ctx.base_flow.in_port.ofp_port);
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Tunnel stats only for not-thawed packets. */
|
|
|
|
|
if (!xin->frozen_state && in_port && in_port->is_tunnel) {
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (ctx.xin->resubmit_stats) {
|
|
|
|
|
netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
|
|
|
|
|
if (in_port->bfd) {
|
|
|
|
|
bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (ctx.xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);
|
|
|
|
|
entry->u.dev.rx = netdev_ref(in_port->netdev);
|
|
|
|
|
entry->u.dev.bfd = bfd_ref(in_port->bfd);
|
2013-11-12 18:18:01 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (!xin->frozen_state && process_special(&ctx, in_port)) {
|
2015-07-29 14:21:52 -07:00
|
|
|
|
/* process_special() did all the processing for this packet.
|
|
|
|
|
*
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* We do not perform special processing on thawed packets, since that
|
|
|
|
|
* was done before they were frozen and should not be redone. */
|
2015-07-29 14:21:52 -07:00
|
|
|
|
} else if (in_port && in_port->xbundle
|
|
|
|
|
&& xbundle_mirror_out(xbridge, in_port->xbundle)) {
|
|
|
|
|
if (ctx.xin->packet != NULL) {
|
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
|
|
|
|
|
VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
|
|
|
|
|
"%s, which is reserved exclusively for mirroring",
|
|
|
|
|
ctx.xbridge->name, in_port->xbundle->name);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Sampling is done on initial reception; don't redo after thawing. */
|
2015-07-29 15:24:05 -07:00
|
|
|
|
unsigned int user_cookie_offset = 0;
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (!xin->frozen_state) {
|
2015-07-29 15:24:05 -07:00
|
|
|
|
user_cookie_offset = compose_sflow_action(&ctx);
|
|
|
|
|
compose_ipfix_action(&ctx, ODPP_NONE);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2015-07-23 16:40:38 -07:00
|
|
|
|
size_t sample_actions_len = ctx.odp_actions->size;
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
2015-07-23 14:43:26 -07:00
|
|
|
|
if (tnl_process_ecn(flow)
|
|
|
|
|
&& (!in_port || may_receive(in_port, &ctx))) {
|
2015-07-23 13:01:57 -07:00
|
|
|
|
const struct ofpact *ofpacts;
|
|
|
|
|
size_t ofpacts_len;
|
|
|
|
|
|
|
|
|
|
if (xin->ofpacts) {
|
|
|
|
|
ofpacts = xin->ofpacts;
|
|
|
|
|
ofpacts_len = xin->ofpacts_len;
|
|
|
|
|
} else if (ctx.rule) {
|
|
|
|
|
const struct rule_actions *actions
|
|
|
|
|
= rule_dpif_get_actions(ctx.rule);
|
|
|
|
|
ofpacts = actions->ofpacts;
|
|
|
|
|
ofpacts_len = actions->ofpacts_len;
|
|
|
|
|
ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule);
|
|
|
|
|
} else {
|
|
|
|
|
OVS_NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
|
ofproto-dpif-xlate: Rewrite mirroring to better fit flow translation.
Until now, mirroring has been implemented by accumulating, across the whole
translation process, a set of mirrors that should receive a mirrored
packet. After translation was complete, mirroring restored the original
version of the packet and sent that version to the mirrors.
That implementation was ugly for multiple reasons. First, it means that
we have to keep a copy of the original packet (or its headers, actually),
which is expensive. Second, it doesn't really make sense to mirror a
version of a packet that is different from the one originally output.
Third, it interacted with recirculation; mirroring needed to happen only
after recirculation was complete, but this was never properly implemented,
so that (I think) mirroring never happened for packets that were
recirculated.
This commit changes how mirroring works. Now, a packet is mirrored at the
point in translation when it becomes eligible for it: for mirrors based on
ingress port, this is at ingress; for mirrors based on egress port, this
is at egress. (Duplicates are dropped.) Mirroring happens on the version
of the packet as it exists when it becomes eligible. Finally, since
mirroring happens immediately, it interacts better with recirculation
(it still isn't perfect, since duplicate mirroring will occur if a packet
is eligible for mirroring both before and after recirculation; this is
not difficult to fix and an upcoming commit later in this series will do so).
Finally, this commit removes more code from xlate_actions() than it adds,
which in my opinion makes it easier to understand.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
2015-07-29 17:00:49 -07:00
|
|
|
|
mirror_ingress_packet(&ctx);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
do_xlate_actions(ofpacts, ofpacts_len, &ctx);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
if (ctx.error) {
|
|
|
|
|
goto exit;
|
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
|
|
|
|
|
/* We've let OFPP_NORMAL and the learning action look at the
|
2016-02-16 10:51:58 -08:00
|
|
|
|
* packet, so cancel all actions and freezing if forwarding is
|
2016-01-28 17:11:19 -08:00
|
|
|
|
* disabled. */
|
2014-08-22 09:01:34 -07:00
|
|
|
|
if (in_port && (!xport_stp_forward_state(in_port) ||
|
|
|
|
|
!xport_rstp_forward_state(in_port))) {
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ctx.odp_actions->size = sample_actions_len;
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ctx_cancel_freeze(&ctx);
|
2016-01-28 17:11:19 -08:00
|
|
|
|
ofpbuf_clear(&ctx.action_set);
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (!ctx.freezing) {
|
2016-01-28 17:11:19 -08:00
|
|
|
|
xlate_action_set(&ctx);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (ctx.freezing) {
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
finish_freezing(&ctx);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-26 11:18:16 -07:00
|
|
|
|
/* Output only fully processed packets. */
|
2016-02-16 10:51:58 -08:00
|
|
|
|
if (!ctx.freezing
|
2015-03-26 11:18:16 -07:00
|
|
|
|
&& xbridge->has_in_band
|
2013-06-26 14:44:39 -07:00
|
|
|
|
&& in_band_must_output_to_local_port(flow)
|
|
|
|
|
&& !actions_output_to_local_port(&ctx)) {
|
2015-03-12 13:02:07 -07:00
|
|
|
|
compose_output_action(&ctx, OFPP_LOCAL, NULL);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2013-07-07 03:52:16 -07:00
|
|
|
|
|
2015-07-29 15:24:05 -07:00
|
|
|
|
if (user_cookie_offset) {
|
|
|
|
|
fix_sflow_action(&ctx, user_cookie_offset);
|
2015-03-26 11:18:16 -07:00
|
|
|
|
}
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-31 13:34:16 -07:00
|
|
|
|
if (nl_attr_oversized(ctx.odp_actions->size)) {
|
2013-10-04 08:48:48 -07:00
|
|
|
|
/* These datapath actions are too big for a Netlink attribute, so we
|
2013-11-02 08:43:14 -07:00
|
|
|
|
* can't hand them to the kernel directly. dpif_execute() can execute
|
|
|
|
|
* them one by one with help, so just mark the result as SLOW_ACTION to
|
|
|
|
|
* prevent the flow from being installed. */
|
|
|
|
|
COVERAGE_INC(xlate_actions_oversize);
|
|
|
|
|
ctx.xout->slow |= SLOW_ACTION;
|
2015-07-31 13:34:16 -07:00
|
|
|
|
} else if (too_many_output_actions(ctx.odp_actions)) {
|
2014-09-09 15:06:52 -07:00
|
|
|
|
COVERAGE_INC(xlate_actions_too_many_output);
|
|
|
|
|
ctx.xout->slow |= SLOW_ACTION;
|
2013-10-04 08:48:48 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-16 10:51:58 -08:00
|
|
|
|
/* Do netflow only for packets on initial reception, that are not sent to
|
|
|
|
|
* the controller. We consider packets sent to the controller to be part
|
|
|
|
|
* of the control plane rather than the data plane. */
|
|
|
|
|
if (!xin->frozen_state
|
|
|
|
|
&& xbridge->netflow
|
|
|
|
|
&& !(xout->slow & SLOW_CONTROLLER)) {
|
2015-07-23 13:01:57 -07:00
|
|
|
|
if (ctx.xin->resubmit_stats) {
|
|
|
|
|
netflow_flow_update(xbridge->netflow, flow,
|
2015-07-24 09:35:58 -07:00
|
|
|
|
ctx.nf_output_iface,
|
2015-07-23 13:01:57 -07:00
|
|
|
|
ctx.xin->resubmit_stats);
|
|
|
|
|
}
|
|
|
|
|
if (ctx.xin->xcache) {
|
|
|
|
|
struct xc_entry *entry;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
|
2015-07-23 13:01:57 -07:00
|
|
|
|
entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETFLOW);
|
|
|
|
|
entry->u.nf.netflow = netflow_ref(xbridge->netflow);
|
|
|
|
|
entry->u.nf.flow = xmemdup(flow, sizeof *flow);
|
2015-07-24 09:35:58 -07:00
|
|
|
|
entry->u.nf.iface = ctx.nf_output_iface;
|
2013-11-12 18:18:01 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-22 17:45:03 -07:00
|
|
|
|
xlate_wc_finish(&ctx);
|
2015-07-31 13:34:16 -07:00
|
|
|
|
|
|
|
|
|
exit:
|
|
|
|
|
ofpbuf_uninit(&ctx.stack);
|
|
|
|
|
ofpbuf_uninit(&ctx.action_set);
|
2016-02-16 10:51:58 -08:00
|
|
|
|
ofpbuf_uninit(&ctx.frozen_actions);
|
2015-07-31 13:34:16 -07:00
|
|
|
|
ofpbuf_uninit(&scratch_actions);
|
2015-11-25 15:19:37 -08:00
|
|
|
|
|
|
|
|
|
/* Make sure we return a "drop flow" in case of an error. */
|
|
|
|
|
if (ctx.error) {
|
|
|
|
|
xout->slow = 0;
|
|
|
|
|
if (xin->odp_actions) {
|
|
|
|
|
ofpbuf_clear(xin->odp_actions);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ctx.error;
|
2013-10-09 04:30:33 +00:00
|
|
|
|
}
|
|
|
|
|
|
Implement serializing the state of packet traversal in "continuations".
One purpose of OpenFlow packet-in messages is to allow a controller to
interpose on the path of a packet through the flow tables. If, for
example, the controller needs to modify a packet in some way that the
switch doesn't directly support, the controller should be able to
program the switch to send it the packet, then modify the packet and
send it back to the switch to continue through the flow table.
That's the theory. In practice, this doesn't work with any but the
simplest flow tables. Packet-in messages simply don't include enough
context to allow the flow table traversal to continue. For example:
* Via "resubmit" actions, an Open vSwitch packet can have an
effective "call stack", but a packet-in can't describe it, and
so it would be lost.
* A packet-in can't preserve the stack used by NXAST_PUSH and
NXAST_POP actions.
* A packet-in can't preserve the OpenFlow 1.1+ action set.
* A packet-in can't preserve the state of Open vSwitch mirroring
or connection tracking.
This commit introduces a solution called "continuations". A continuation
is the state of a packet's traversal through OpenFlow flow tables. A
"controller" action with the "pause" flag, which is newly implemented in
this commit, generates a continuation and sends it to the OpenFlow
controller in a packet-in asynchronous message (only NXT_PACKET_IN2
supports continuations, so the controller must configure them with
NXT_SET_PACKET_IN_FORMAT). The controller processes the packet-in,
possibly modifying some of its data, and sends it back to the switch with
an NXT_RESUME request, which causes flow table traversal to continue. In
principle, a single packet can be paused and resumed multiple times.
Another way to look at it is:
- "pause" is an extension of the existing OFPAT_CONTROLLER
action. It sends the packet to the controller, with full
pipeline context (some of which is switch implementation
dependent, and may thus vary from switch to switch).
- A continuation is an extension of OFPT_PACKET_IN, allowing for
implementation dependent metadata.
- NXT_RESUME is an extension of OFPT_PACKET_OUT, with the
semantics that the pipeline processing is continued with the
original translation context from where it was left at the time
it was paused.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2016-02-19 16:10:06 -08:00
|
|
|
|
enum ofperr
|
|
|
|
|
xlate_resume(struct ofproto_dpif *ofproto,
|
|
|
|
|
const struct ofputil_packet_in_private *pin,
|
|
|
|
|
struct ofpbuf *odp_actions,
|
|
|
|
|
enum slow_path_reason *slow)
|
|
|
|
|
{
|
|
|
|
|
struct dp_packet packet;
|
|
|
|
|
dp_packet_use_const(&packet, pin->public.packet,
|
|
|
|
|
pin->public.packet_len);
|
|
|
|
|
|
|
|
|
|
struct flow flow;
|
|
|
|
|
flow_extract(&packet, &flow);
|
|
|
|
|
|
|
|
|
|
struct xlate_in xin;
|
|
|
|
|
xlate_in_init(&xin, ofproto, &flow, 0, NULL, ntohs(flow.tcp_flags),
|
|
|
|
|
&packet, NULL, odp_actions);
|
|
|
|
|
|
|
|
|
|
struct ofpact_note noop;
|
|
|
|
|
ofpact_init_NOTE(&noop);
|
|
|
|
|
noop.length = 0;
|
|
|
|
|
|
|
|
|
|
bool any_actions = pin->actions_len > 0;
|
|
|
|
|
struct frozen_state state = {
|
|
|
|
|
.table_id = 0, /* Not the table where NXAST_PAUSE was executed. */
|
|
|
|
|
.ofproto_uuid = pin->bridge,
|
|
|
|
|
.stack = pin->stack,
|
|
|
|
|
.n_stack = pin->n_stack,
|
|
|
|
|
.mirrors = pin->mirrors,
|
|
|
|
|
.conntracked = pin->conntracked,
|
|
|
|
|
|
|
|
|
|
/* When there are no actions, xlate_actions() will search the flow
|
|
|
|
|
* table. We don't want it to do that (we want it to resume), so
|
|
|
|
|
* supply a no-op action if there aren't any.
|
|
|
|
|
*
|
|
|
|
|
* (We can't necessarily avoid translating actions entirely if there
|
|
|
|
|
* aren't any actions, because there might be some finishing-up to do
|
|
|
|
|
* at the end of the pipeline, and we don't check for those
|
|
|
|
|
* conditions.) */
|
|
|
|
|
.ofpacts = any_actions ? pin->actions : &noop.ofpact,
|
|
|
|
|
.ofpacts_len = any_actions ? pin->actions_len : sizeof noop,
|
|
|
|
|
|
|
|
|
|
.action_set = pin->action_set,
|
|
|
|
|
.action_set_len = pin->action_set_len,
|
|
|
|
|
};
|
|
|
|
|
frozen_metadata_from_flow(&state.metadata,
|
|
|
|
|
&pin->public.flow_metadata.flow);
|
|
|
|
|
xin.frozen_state = &state;
|
|
|
|
|
|
|
|
|
|
struct xlate_out xout;
|
|
|
|
|
enum xlate_error error = xlate_actions(&xin, &xout);
|
|
|
|
|
*slow = xout.slow;
|
|
|
|
|
xlate_out_uninit(&xout);
|
|
|
|
|
|
|
|
|
|
/* xlate_actions() can generate a number of errors, but only
|
|
|
|
|
* XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be
|
|
|
|
|
* sure to report over OpenFlow. The others could come up in packet-outs
|
|
|
|
|
* or regular flow translation and I don't think that it's going to be too
|
|
|
|
|
* useful to report them to the controller. */
|
|
|
|
|
return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-10-09 04:30:33 +00:00
|
|
|
|
/* Sends 'packet' out 'ofport'.
|
|
|
|
|
* May modify 'packet'.
|
|
|
|
|
* Returns 0 if successful, otherwise a positive errno value. */
|
|
|
|
|
int
|
2015-02-22 03:21:09 -08:00
|
|
|
|
xlate_send_packet(const struct ofport_dpif *ofport, struct dp_packet *packet)
|
2013-10-09 04:30:33 +00:00
|
|
|
|
{
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2013-10-09 04:30:33 +00:00
|
|
|
|
struct xport *xport;
|
|
|
|
|
struct ofpact_output output;
|
|
|
|
|
struct flow flow;
|
|
|
|
|
|
|
|
|
|
ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
|
|
|
|
|
/* Use OFPP_NONE as the in_port to avoid special packet processing. */
|
2015-02-22 03:21:09 -08:00
|
|
|
|
flow_extract(packet, &flow);
|
2014-02-26 18:08:04 -08:00
|
|
|
|
flow.in_port.ofp_port = OFPP_NONE;
|
2013-10-09 04:30:33 +00:00
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xport = xport_lookup(xcfg, ofport);
|
2013-10-09 04:30:33 +00:00
|
|
|
|
if (!xport) {
|
2013-10-11 14:17:13 -07:00
|
|
|
|
return EINVAL;
|
2013-10-09 04:30:33 +00:00
|
|
|
|
}
|
|
|
|
|
output.port = xport->ofp_port;
|
|
|
|
|
output.max_len = 0;
|
2014-01-15 10:06:40 -08:00
|
|
|
|
|
|
|
|
|
return ofproto_dpif_execute_actions(xport->xbridge->ofproto, &flow, NULL,
|
|
|
|
|
&output.ofpact, sizeof output,
|
|
|
|
|
packet);
|
2013-06-11 13:32:30 -07:00
|
|
|
|
}
|
2014-04-10 16:00:28 +12:00
|
|
|
|
|
|
|
|
|
struct xlate_cache *
|
|
|
|
|
xlate_cache_new(void)
|
|
|
|
|
{
|
|
|
|
|
struct xlate_cache *xcache = xmalloc(sizeof *xcache);
|
|
|
|
|
|
|
|
|
|
ofpbuf_init(&xcache->entries, 512);
|
|
|
|
|
return xcache;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct xc_entry *
|
|
|
|
|
xlate_cache_add_entry(struct xlate_cache *xcache, enum xc_type type)
|
|
|
|
|
{
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
|
|
|
|
|
entry = ofpbuf_put_zeros(&xcache->entries, sizeof *entry);
|
|
|
|
|
entry->type = type;
|
|
|
|
|
|
|
|
|
|
return entry;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_cache_netdev(struct xc_entry *entry, const struct dpif_flow_stats *stats)
|
|
|
|
|
{
|
|
|
|
|
if (entry->u.dev.tx) {
|
|
|
|
|
netdev_vport_inc_tx(entry->u.dev.tx, stats);
|
|
|
|
|
}
|
|
|
|
|
if (entry->u.dev.rx) {
|
|
|
|
|
netdev_vport_inc_rx(entry->u.dev.rx, stats);
|
|
|
|
|
}
|
|
|
|
|
if (entry->u.dev.bfd) {
|
|
|
|
|
bfd_account_rx(entry->u.dev.bfd, stats);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_cache_normal(struct ofproto_dpif *ofproto, struct flow *flow, int vlan)
|
|
|
|
|
{
|
2014-05-27 17:34:14 -07:00
|
|
|
|
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
struct xbridge *xbridge;
|
|
|
|
|
struct xbundle *xbundle;
|
|
|
|
|
struct flow_wildcards wc;
|
|
|
|
|
|
2014-05-27 17:34:14 -07:00
|
|
|
|
xbridge = xbridge_lookup(xcfg, ofproto);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
if (!xbridge) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xbundle = lookup_input_bundle(xbridge, flow->in_port.ofp_port, false,
|
|
|
|
|
NULL);
|
|
|
|
|
if (!xbundle) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
update_learning_table(xbridge, flow, &wc, vlan, xbundle);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Push stats and perform side effects of flow translation. */
|
|
|
|
|
void
|
2014-08-22 15:32:19 -07:00
|
|
|
|
xlate_push_stats(struct xlate_cache *xcache,
|
2014-04-10 16:00:28 +12:00
|
|
|
|
const struct dpif_flow_stats *stats)
|
|
|
|
|
{
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
struct ofpbuf entries = xcache->entries;
|
2015-08-28 14:55:11 -07:00
|
|
|
|
struct eth_addr dmac;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
|
2014-08-22 15:18:33 -07:00
|
|
|
|
if (!stats->n_packets) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-10 16:00:28 +12:00
|
|
|
|
XC_ENTRY_FOR_EACH (entry, entries, xcache) {
|
|
|
|
|
switch (entry->type) {
|
|
|
|
|
case XC_RULE:
|
|
|
|
|
rule_dpif_credit_stats(entry->u.rule, stats);
|
|
|
|
|
break;
|
|
|
|
|
case XC_BOND:
|
|
|
|
|
bond_account(entry->u.bond.bond, entry->u.bond.flow,
|
|
|
|
|
entry->u.bond.vid, stats->n_bytes);
|
|
|
|
|
break;
|
|
|
|
|
case XC_NETDEV:
|
|
|
|
|
xlate_cache_netdev(entry, stats);
|
|
|
|
|
break;
|
|
|
|
|
case XC_NETFLOW:
|
|
|
|
|
netflow_flow_update(entry->u.nf.netflow, entry->u.nf.flow,
|
|
|
|
|
entry->u.nf.iface, stats);
|
|
|
|
|
break;
|
|
|
|
|
case XC_MIRROR:
|
|
|
|
|
mirror_update_stats(entry->u.mirror.mbridge,
|
|
|
|
|
entry->u.mirror.mirrors,
|
|
|
|
|
stats->n_packets, stats->n_bytes);
|
|
|
|
|
break;
|
|
|
|
|
case XC_LEARN:
|
2014-08-22 15:32:19 -07:00
|
|
|
|
ofproto_dpif_flow_mod(entry->u.learn.ofproto, entry->u.learn.fm);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
break;
|
|
|
|
|
case XC_NORMAL:
|
2014-08-22 15:32:19 -07:00
|
|
|
|
xlate_cache_normal(entry->u.normal.ofproto, entry->u.normal.flow,
|
|
|
|
|
entry->u.normal.vlan);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
break;
|
|
|
|
|
case XC_FIN_TIMEOUT:
|
|
|
|
|
xlate_fin_timeout__(entry->u.fin.rule, stats->tcp_flags,
|
|
|
|
|
entry->u.fin.idle, entry->u.fin.hard);
|
|
|
|
|
break;
|
2014-05-22 10:47:13 +00:00
|
|
|
|
case XC_GROUP:
|
|
|
|
|
group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket,
|
|
|
|
|
stats);
|
|
|
|
|
break;
|
2015-11-30 16:24:49 -02:00
|
|
|
|
case XC_TNL_NEIGH:
|
|
|
|
|
/* Lookup neighbor to avoid timeout. */
|
2015-11-25 11:31:12 -02:00
|
|
|
|
tnl_neigh_lookup(entry->u.tnl_neigh_cache.br_name,
|
|
|
|
|
&entry->u.tnl_neigh_cache.d_ipv6, &dmac);
|
2014-11-11 11:53:47 -08:00
|
|
|
|
break;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
default:
|
|
|
|
|
OVS_NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_dev_unref(struct xc_entry *entry)
|
|
|
|
|
{
|
|
|
|
|
if (entry->u.dev.tx) {
|
|
|
|
|
netdev_close(entry->u.dev.tx);
|
|
|
|
|
}
|
|
|
|
|
if (entry->u.dev.rx) {
|
|
|
|
|
netdev_close(entry->u.dev.rx);
|
|
|
|
|
}
|
|
|
|
|
if (entry->u.dev.bfd) {
|
|
|
|
|
bfd_unref(entry->u.dev.bfd);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
xlate_cache_clear_netflow(struct netflow *netflow, struct flow *flow)
|
|
|
|
|
{
|
|
|
|
|
netflow_flow_clear(netflow, flow);
|
|
|
|
|
netflow_unref(netflow);
|
|
|
|
|
free(flow);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
xlate_cache_clear(struct xlate_cache *xcache)
|
|
|
|
|
{
|
|
|
|
|
struct xc_entry *entry;
|
|
|
|
|
struct ofpbuf entries;
|
|
|
|
|
|
|
|
|
|
if (!xcache) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
XC_ENTRY_FOR_EACH (entry, entries, xcache) {
|
|
|
|
|
switch (entry->type) {
|
|
|
|
|
case XC_RULE:
|
|
|
|
|
rule_dpif_unref(entry->u.rule);
|
|
|
|
|
break;
|
|
|
|
|
case XC_BOND:
|
|
|
|
|
free(entry->u.bond.flow);
|
|
|
|
|
bond_unref(entry->u.bond.bond);
|
|
|
|
|
break;
|
|
|
|
|
case XC_NETDEV:
|
|
|
|
|
xlate_dev_unref(entry);
|
|
|
|
|
break;
|
|
|
|
|
case XC_NETFLOW:
|
|
|
|
|
xlate_cache_clear_netflow(entry->u.nf.netflow, entry->u.nf.flow);
|
|
|
|
|
break;
|
|
|
|
|
case XC_MIRROR:
|
|
|
|
|
mbridge_unref(entry->u.mirror.mbridge);
|
|
|
|
|
break;
|
|
|
|
|
case XC_LEARN:
|
ofproto-dpif-xlate: Cache full flowmod for learning.
Caching the results of xlate_learn was previously dependent on the state
of the 'may_learn' flag. This meant that if the caller did not specify
that this flow may learn, then a learn entry would not be cached.
However, the xlate_cache tends to be used on a recurring basis, so
failing to cache the learn entry can provide unexpected behaviour later
on, particularly in corner cases.
Such a corner case occurred previously:-
* Revalidation was requested.
* A flow with a learn action was dumped.
* The flow had no packets.
* The flow's corresponding xcache was cleared, and the flow revalidated.
* The flow went on to receive packets after the xcache is re-created.
In this case, the xcache would be re-created, but would not refresh the
timeouts on the learnt flow until the next time it was cleared, even if
it received more traffic. This would cause flows to time out sooner than
expected. Symptoms of this bug may include unexpected forwarding
behaviour or extraneous statistics being attributed to the wrong flow.
This patch fixes the issue by caching the entire flow_mod, including
actions, upon translating an xlate_learn action. This is used to perform
a flow_mod from scratch with the original flow, rather than simply
refreshing the rule that was created during the creation of the xcache.
Bug #1252997.
Reported-by: Scott Hendricks <shendricks@vmware.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2014-06-03 20:44:35 +12:00
|
|
|
|
free(entry->u.learn.fm);
|
|
|
|
|
ofpbuf_delete(entry->u.learn.ofpacts);
|
2014-04-10 16:00:28 +12:00
|
|
|
|
break;
|
|
|
|
|
case XC_NORMAL:
|
|
|
|
|
free(entry->u.normal.flow);
|
|
|
|
|
break;
|
|
|
|
|
case XC_FIN_TIMEOUT:
|
2014-04-24 08:21:49 -07:00
|
|
|
|
/* 'u.fin.rule' is always already held as a XC_RULE, which
|
|
|
|
|
* has already released it's reference above. */
|
2014-04-10 16:00:28 +12:00
|
|
|
|
break;
|
2014-05-22 10:47:13 +00:00
|
|
|
|
case XC_GROUP:
|
|
|
|
|
group_dpif_unref(entry->u.group.group);
|
|
|
|
|
break;
|
2015-11-30 16:24:49 -02:00
|
|
|
|
case XC_TNL_NEIGH:
|
2014-11-11 11:53:47 -08:00
|
|
|
|
break;
|
2014-04-10 16:00:28 +12:00
|
|
|
|
default:
|
|
|
|
|
OVS_NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ofpbuf_clear(&xcache->entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
xlate_cache_delete(struct xlate_cache *xcache)
|
|
|
|
|
{
|
|
|
|
|
xlate_cache_clear(xcache);
|
|
|
|
|
ofpbuf_uninit(&xcache->entries);
|
|
|
|
|
free(xcache);
|
|
|
|
|
}
|