2009-07-08 13:19:16 -07:00
|
|
|
/*
|
dpif-netdev: Add clone action
Add support for userspace datapath clone action. The clone action
provides an action envelope to enclose an action list.
For example, with actions A, B, C and D, and an action list:
A, clone(B, C), D
The clone action will ensure that:
- D will see the same packet, and any meta states, such as flow, as
action B.
- D will be executed regardless whether B, or C drops a packet. They
can only drop a clone.
- When B drops a packet, clone will skip all remaining actions
within the clone envelope. This feature is useful when we add
meter action later: The meter action can be implemented as a
simple action without its own envolop (unlike the sample action).
When necessary, the flow translation layer can enclose a meter action
in clone.
The clone action is very similar with the OpenFlow clone action.
This is by design to simplify vswitchd flow translation logic.
Without datapath clone, vswitchd simulate the effect by inserting
datapath actions to "undo" clone actions. The above flow will be
translated into A, B, C, -C, -B, D.
However, there are two issues:
- The resulting datapath action list may be longer without using
clone.
- Some actions, such as NAT may not be possible to reverse.
This patch implements clone() simply with packet copy. The performance
can be improved with later patches, for example, to delay or avoid
packet copy if possible. It seems datapath should have enough context
to carry out such optimization without the userspace context.
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
|
|
|
* Copyright (c) 2007-2017 Nicira, Inc.
|
2009-07-08 13:19:16 -07:00
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
* This file is offered under your choice of two licenses: Apache 2.0 or GNU
|
|
|
|
* GPL 2.0 or later. The permission statements for each of these licenses is
|
|
|
|
* given below. You may license your modifications to this file under either
|
|
|
|
* of these licenses or both. If you wish to license your modifications under
|
|
|
|
* only one of these licenses, delete the permission text for the other
|
|
|
|
* license.
|
2009-07-08 13:19:16 -07:00
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
* ----------------------------------------------------------------------
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
* ----------------------------------------------------------------------
|
2011-11-16 13:39:40 -08:00
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
2009-06-15 15:11:30 -07:00
|
|
|
*
|
2011-11-16 13:39:40 -08:00
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
2009-06-15 15:11:30 -07:00
|
|
|
*
|
2011-11-16 13:39:40 -08:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
* 02110-1301, USA
|
2009-06-15 15:11:30 -07:00
|
|
|
* ----------------------------------------------------------------------
|
2009-07-08 13:19:16 -07:00
|
|
|
*/
|
|
|
|
|
2011-10-05 10:50:58 -07:00
|
|
|
#ifndef _LINUX_OPENVSWITCH_H
|
|
|
|
#define _LINUX_OPENVSWITCH_H 1
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
#include <linux/types.h>
|
2013-03-29 18:30:34 -07:00
|
|
|
#include <linux/if_ether.h>
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
|
2011-10-22 18:50:44 -07:00
|
|
|
/**
|
|
|
|
* struct ovs_header - header for OVS Generic Netlink messages.
|
|
|
|
* @dp_ifindex: ifindex of local port for datapath (0 to make a request not
|
|
|
|
* specific to a datapath).
|
|
|
|
*
|
|
|
|
* Attributes following the header are specific to a particular OVS Generic
|
|
|
|
* Netlink family, but all of the OVS families use this header.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct ovs_header {
|
|
|
|
int dp_ifindex;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Datapaths. */
|
2011-01-28 13:55:04 -08:00
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_DATAPATH_FAMILY "ovs_datapath"
|
|
|
|
#define OVS_DATAPATH_MCGROUP "ovs_datapath"
|
2013-12-03 20:56:22 -08:00
|
|
|
|
|
|
|
/* V2:
|
|
|
|
* - API users are expected to provide OVS_DP_ATTR_USER_FEATURES
|
|
|
|
* when creating the datapath.
|
|
|
|
*/
|
|
|
|
#define OVS_DATAPATH_VERSION 2
|
|
|
|
|
|
|
|
/* First OVS datapath version to support features */
|
|
|
|
#define OVS_DP_VER_FEATURES 2
|
2011-08-18 10:35:40 -07:00
|
|
|
|
|
|
|
enum ovs_datapath_cmd {
|
|
|
|
OVS_DP_CMD_UNSPEC,
|
|
|
|
OVS_DP_CMD_NEW,
|
|
|
|
OVS_DP_CMD_DEL,
|
|
|
|
OVS_DP_CMD_GET,
|
|
|
|
OVS_DP_CMD_SET
|
2011-01-28 13:55:04 -08:00
|
|
|
};
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2011-01-26 15:42:00 -08:00
|
|
|
/**
|
2011-08-18 10:35:40 -07:00
|
|
|
* enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
|
|
|
|
* @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
|
2011-01-21 17:01:56 -08:00
|
|
|
* port". This is the name of the network device whose dp_ifindex is given in
|
2011-08-18 10:35:40 -07:00
|
|
|
* the &struct ovs_header. Always present in notifications. Required in
|
|
|
|
* %OVS_DP_NEW requests. May be used as an alternative to specifying
|
2011-01-21 17:01:56 -08:00
|
|
|
* dp_ifindex in other requests (with a dp_ifindex of 0).
|
2011-09-14 13:05:09 -07:00
|
|
|
* @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
|
|
|
|
* set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
|
|
|
|
* %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
|
|
|
|
* not be sent.
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
* @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
|
|
|
|
* OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
|
2011-01-28 13:55:04 -08:00
|
|
|
* datapath. Always present in notifications.
|
2013-10-21 14:37:33 -07:00
|
|
|
* @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
|
|
|
|
* datapath. Always present in notifications.
|
2011-01-26 15:42:00 -08:00
|
|
|
*
|
2011-08-18 10:35:40 -07:00
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_DP_* commands.
|
2011-01-26 15:42:00 -08:00
|
|
|
*/
|
2011-08-18 10:35:40 -07:00
|
|
|
enum ovs_datapath_attr {
|
|
|
|
OVS_DP_ATTR_UNSPEC,
|
2013-10-21 14:37:33 -07:00
|
|
|
OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
|
|
|
|
OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
|
|
|
|
OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
|
|
|
|
OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
|
2013-12-03 20:47:24 -08:00
|
|
|
OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */
|
2016-07-17 09:52:10 -07:00
|
|
|
OVS_DP_ATTR_PAD,
|
2021-09-06 10:53:58 +02:00
|
|
|
OVS_DP_ATTR_MASKS_CACHE_SIZE,
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls */
|
2011-08-18 10:35:40 -07:00
|
|
|
__OVS_DP_ATTR_MAX
|
2011-01-26 15:42:00 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
|
2011-01-26 15:42:00 -08:00
|
|
|
|
2014-04-28 17:31:25 -07:00
|
|
|
/* All 64-bit integers within Netlink messages are 4-byte aligned only. */
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_dp_stats {
|
2011-10-22 18:50:44 -07:00
|
|
|
__u64 n_hit; /* Number of flow table matches. */
|
|
|
|
__u64 n_missed; /* Number of flow table misses. */
|
|
|
|
__u64 n_lost; /* Number of misses not sent to userspace. */
|
|
|
|
__u64 n_flows; /* Number of flows present */
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2013-10-21 14:37:33 -07:00
|
|
|
struct ovs_dp_megaflow_stats {
|
|
|
|
__u64 n_mask_hit; /* Number of masks used for flow lookups. */
|
|
|
|
__u32 n_masks; /* Number of masks for the datapath. */
|
|
|
|
__u32 pad0; /* Pad for future expension. */
|
2021-09-06 10:53:42 +02:00
|
|
|
__u64 n_cache_hit; /* Number of cache matches for flow lookups. */
|
2013-10-21 14:37:33 -07:00
|
|
|
__u64 pad1; /* Pad for future expension. */
|
|
|
|
};
|
|
|
|
|
2011-09-15 19:36:17 -07:00
|
|
|
struct ovs_vport_stats {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__u64 rx_packets; /* total packets received */
|
|
|
|
__u64 tx_packets; /* total packets transmitted */
|
|
|
|
__u64 rx_bytes; /* total bytes received */
|
|
|
|
__u64 tx_bytes; /* total bytes transmitted */
|
|
|
|
__u64 rx_errors; /* bad packets received */
|
|
|
|
__u64 tx_errors; /* packet transmit problems */
|
|
|
|
__u64 rx_dropped; /* no space in linux buffers */
|
|
|
|
__u64 tx_dropped; /* no space available in linux */
|
2011-09-15 19:36:17 -07:00
|
|
|
};
|
|
|
|
|
2013-12-03 20:47:24 -08:00
|
|
|
/* Allow last Netlink attribute to be unaligned */
|
|
|
|
#define OVS_DP_F_UNALIGNED (1 << 0)
|
|
|
|
|
2014-04-14 23:37:10 -07:00
|
|
|
/* Allow datapath to associate multiple Netlink PIDs to each vport */
|
|
|
|
#define OVS_DP_F_VPORT_PIDS (1 << 1)
|
|
|
|
|
2019-12-22 12:16:38 +02:00
|
|
|
/* Allow tc offload recirc sharing */
|
|
|
|
#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
|
|
|
|
|
dpif-netlink: Introduce per-cpu upcall dispatch.
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
|
|
|
/* Allow per-cpu dispatch of upcalls */
|
|
|
|
#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
|
|
|
|
|
2011-10-22 18:50:44 -07:00
|
|
|
/* Fixed logical ports. */
|
2013-01-07 12:45:27 +02:00
|
|
|
#define OVSP_LOCAL ((__u32)0)
|
2011-10-22 18:50:44 -07:00
|
|
|
|
|
|
|
/* Packet transfer. */
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_PACKET_FAMILY "ovs_packet"
|
2011-10-22 18:22:18 -07:00
|
|
|
#define OVS_PACKET_VERSION 0x1
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
enum ovs_packet_cmd {
|
2011-10-22 18:50:44 -07:00
|
|
|
OVS_PACKET_CMD_UNSPEC,
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2011-10-22 18:50:44 -07:00
|
|
|
/* Kernel-to-user notifications. */
|
|
|
|
OVS_PACKET_CMD_MISS, /* Flow table miss. */
|
|
|
|
OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
|
2011-01-26 13:41:54 -08:00
|
|
|
|
2011-10-22 18:50:44 -07:00
|
|
|
/* Userspace commands. */
|
|
|
|
OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
|
2011-01-26 13:41:54 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
2011-08-18 10:35:40 -07:00
|
|
|
* enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
|
|
|
|
* @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
|
2011-01-26 13:41:54 -08:00
|
|
|
* packet as received, from the start of the Ethernet header onward. For
|
2011-08-18 10:35:40 -07:00
|
|
|
* %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
|
|
|
|
* actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
|
2011-01-23 21:56:00 -08:00
|
|
|
* the flow key extracted from the packet as originally received.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
|
|
|
|
* extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
|
2011-01-26 13:41:54 -08:00
|
|
|
* userspace to adapt its flow setup strategy by comparing its notion of the
|
2014-04-28 17:31:26 -07:00
|
|
|
* flow key against the kernel's. When used with %OVS_PACKET_CMD_EXECUTE, only
|
|
|
|
* metadata key fields (e.g. priority, skb mark) are honored. All the packet
|
|
|
|
* header fields are parsed from the packet instead.
|
2011-09-28 10:43:07 -07:00
|
|
|
* @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
|
|
|
|
* for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
|
2015-06-11 09:43:58 -07:00
|
|
|
* Also used in upcall when %OVS_ACTION_ATTR_USERSPACE has optional
|
|
|
|
* %OVS_USERSPACE_ATTR_ACTIONS attribute.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
|
2011-10-12 16:24:54 -07:00
|
|
|
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
|
2013-02-15 16:48:32 -08:00
|
|
|
* %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
|
|
|
|
* specified there.
|
2014-08-17 20:19:36 -07:00
|
|
|
* @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION
|
|
|
|
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
|
|
|
|
* %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
|
|
|
|
* output port is actually a tunnel port. Contains the output tunnel key
|
|
|
|
* extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
|
2015-02-26 15:52:34 -08:00
|
|
|
* @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
|
2016-06-24 15:50:58 -07:00
|
|
|
* @OVS_PACKET_ATTR_LEN: Packet size before truncation.
|
2015-02-26 15:52:34 -08:00
|
|
|
* %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
|
|
|
|
* size.
|
|
|
|
*
|
2011-08-18 10:35:40 -07:00
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_PACKET_* commands.
|
2011-01-26 13:41:54 -08:00
|
|
|
*/
|
2011-08-18 10:35:40 -07:00
|
|
|
enum ovs_packet_attr {
|
|
|
|
OVS_PACKET_ATTR_UNSPEC,
|
|
|
|
OVS_PACKET_ATTR_PACKET, /* Packet data. */
|
|
|
|
OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
|
2011-09-28 10:43:07 -07:00
|
|
|
OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
|
2013-02-15 16:48:32 -08:00
|
|
|
OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
|
2014-08-17 20:19:36 -07:00
|
|
|
OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_*
|
|
|
|
attributes. */
|
2015-01-15 00:17:31 +01:00
|
|
|
OVS_PACKET_ATTR_UNUSED1,
|
|
|
|
OVS_PACKET_ATTR_UNUSED2,
|
|
|
|
OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe,
|
|
|
|
error logging should be suppressed. */
|
2015-02-26 15:52:34 -08:00
|
|
|
OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
|
2016-06-24 15:50:58 -07:00
|
|
|
OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */
|
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.
The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-11-15 10:58:59 +08:00
|
|
|
OVS_PACKET_ATTR_HASH, /* Packet hash. */
|
2011-08-18 10:35:40 -07:00
|
|
|
__OVS_PACKET_ATTR_MAX
|
2010-01-04 13:08:37 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
|
2011-01-23 20:01:30 -08:00
|
|
|
|
2011-10-22 18:50:44 -07:00
|
|
|
/* Virtual ports. */
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_VPORT_FAMILY "ovs_vport"
|
|
|
|
#define OVS_VPORT_MCGROUP "ovs_vport"
|
2011-10-22 18:22:18 -07:00
|
|
|
#define OVS_VPORT_VERSION 0x1
|
2011-08-18 10:35:40 -07:00
|
|
|
|
|
|
|
enum ovs_vport_cmd {
|
|
|
|
OVS_VPORT_CMD_UNSPEC,
|
|
|
|
OVS_VPORT_CMD_NEW,
|
|
|
|
OVS_VPORT_CMD_DEL,
|
|
|
|
OVS_VPORT_CMD_GET,
|
|
|
|
OVS_VPORT_CMD_SET
|
2011-01-28 13:59:03 -08:00
|
|
|
};
|
2011-01-23 20:01:30 -08:00
|
|
|
|
2011-10-22 18:50:44 -07:00
|
|
|
enum ovs_vport_type {
|
|
|
|
OVS_VPORT_TYPE_UNSPEC,
|
|
|
|
OVS_VPORT_TYPE_NETDEV, /* network device */
|
|
|
|
OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
|
2014-11-08 07:24:42 -08:00
|
|
|
OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
|
|
|
|
OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */
|
|
|
|
OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */
|
2024-12-04 21:45:11 +01:00
|
|
|
OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel (deprecated). */
|
2024-12-04 21:45:12 +01:00
|
|
|
OVS_VPORT_TYPE_STT = 106, /* STT tunnel (deprecated). */
|
2018-03-05 10:11:57 -08:00
|
|
|
OVS_VPORT_TYPE_ERSPAN = 107, /* ERSPAN tunnel. */
|
|
|
|
OVS_VPORT_TYPE_IP6ERSPAN = 108, /* ERSPAN tunnel. */
|
|
|
|
OVS_VPORT_TYPE_IP6GRE = 109,
|
2019-11-25 11:19:23 -08:00
|
|
|
OVS_VPORT_TYPE_GTPU = 110,
|
2020-12-17 12:48:41 +05:30
|
|
|
OVS_VPORT_TYPE_BAREUDP = 111, /* Bareudp tunnel. */
|
2023-03-29 14:51:17 +09:00
|
|
|
OVS_VPORT_TYPE_SRV6 = 112, /* SRv6 tunnel. */
|
2011-10-22 18:50:44 -07:00
|
|
|
__OVS_VPORT_TYPE_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
|
|
|
|
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
/**
|
2011-08-18 10:35:40 -07:00
|
|
|
* enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
|
|
|
|
* @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
|
|
|
|
* @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
|
2011-01-28 13:59:03 -08:00
|
|
|
* of vport.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
|
2011-01-28 13:59:03 -08:00
|
|
|
* this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
|
|
|
|
* plus a null terminator.
|
2011-11-05 20:13:55 -07:00
|
|
|
* @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
|
2014-04-14 23:37:10 -07:00
|
|
|
* @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
|
|
|
|
* among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
|
|
|
|
* received on this port. If this is a single-element array of value 0,
|
|
|
|
* upcalls should not be sent.
|
2011-09-15 19:36:17 -07:00
|
|
|
* @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
|
2011-01-28 13:59:03 -08:00
|
|
|
* packets sent or received through the vport.
|
|
|
|
*
|
2011-08-18 10:35:40 -07:00
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_VPORT_* commands.
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
*
|
2011-08-18 10:35:40 -07:00
|
|
|
* For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
|
|
|
|
* %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
|
2011-01-28 13:59:03 -08:00
|
|
|
* optional; if not specified a free port number is automatically selected.
|
2011-08-18 10:35:40 -07:00
|
|
|
* Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
|
2013-01-25 13:42:01 -08:00
|
|
|
* of vport. %OVS_VPORT_ATTR_STATS is optional and other attributes are
|
|
|
|
* ignored.
|
2011-01-28 13:59:03 -08:00
|
|
|
*
|
2011-08-18 10:35:40 -07:00
|
|
|
* For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
|
2011-01-28 13:59:03 -08:00
|
|
|
* look up the vport to operate on; otherwise dp_idx from the &struct
|
2011-08-18 10:35:40 -07:00
|
|
|
* ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
|
datapath: Change listing ports to use an iterator concept.
One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other. To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software. In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed. Neither choice is very attractive.
This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call. It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.
It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.
The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-10 13:12:12 -08:00
|
|
|
*/
|
2011-08-18 10:35:40 -07:00
|
|
|
enum ovs_vport_attr {
|
|
|
|
OVS_VPORT_ATTR_UNSPEC,
|
2011-10-25 14:03:35 -07:00
|
|
|
OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
|
|
|
|
OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
|
2011-08-18 10:35:40 -07:00
|
|
|
OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
|
2011-11-05 20:13:55 -07:00
|
|
|
OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
|
2014-04-14 23:37:10 -07:00
|
|
|
OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
|
|
|
|
/* receiving upcalls */
|
2011-09-15 19:36:17 -07:00
|
|
|
OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
|
2016-07-17 09:52:10 -07:00
|
|
|
OVS_VPORT_ATTR_PAD,
|
2018-02-07 07:49:52 -08:00
|
|
|
OVS_VPORT_ATTR_IFINDEX,
|
|
|
|
OVS_VPORT_ATTR_NETNSID,
|
2023-01-18 20:31:17 -05:00
|
|
|
OVS_VPORT_ATTR_UPCALL_STATS,
|
2011-08-18 10:35:40 -07:00
|
|
|
__OVS_VPORT_ATTR_MAX
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
|
2011-01-26 12:28:59 -08:00
|
|
|
|
2023-01-18 20:31:17 -05:00
|
|
|
/**
|
|
|
|
* enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands
|
|
|
|
* @OVS_VPORT_UPCALL_ATTR_SUCCESS: 64-bit upcall success packets.
|
|
|
|
* @OVS_VPORT_UPCALL_ATTR_FAIL: 64-bit upcall fail packets.
|
|
|
|
*/
|
|
|
|
enum ovs_vport_upcall_attr {
|
|
|
|
OVS_VPORT_UPCALL_ATTR_SUCCESS,
|
|
|
|
OVS_VPORT_UPCALL_ATTR_FAIL,
|
|
|
|
__OVS_VPORT_UPCALL_ATTR_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VPORT_UPCALL_ATTR_MAX (__OVS_VPORT_UPCALL_ATTR_MAX - 1)
|
|
|
|
|
2015-02-06 21:10:44 +01:00
|
|
|
enum {
|
|
|
|
OVS_VXLAN_EXT_UNSPEC,
|
2017-06-30 16:27:45 -04:00
|
|
|
OVS_VXLAN_EXT_GBP,
|
|
|
|
/* place new values here to fill gap. */
|
|
|
|
OVS_VXLAN_EXT_GPE = 8,
|
2015-02-06 21:10:44 +01:00
|
|
|
__OVS_VXLAN_EXT_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
|
|
|
|
|
2020-12-17 12:48:41 +05:30
|
|
|
enum {
|
|
|
|
OVS_BAREUDP_EXT_UNSPEC,
|
|
|
|
OVS_BAREUDP_EXT_MULTIPROTO_MODE,
|
|
|
|
__OVS_BAREUDP_EXT_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_BAREUDP_EXT_MAX (__OVS_BAREUDP_EXT_MAX - 1)
|
|
|
|
|
2013-03-04 13:00:25 -08:00
|
|
|
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
OVS_TUNNEL_ATTR_UNSPEC,
|
|
|
|
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
|
2015-02-06 21:10:44 +01:00
|
|
|
OVS_TUNNEL_ATTR_EXTENSION,
|
2013-03-04 13:00:25 -08:00
|
|
|
__OVS_TUNNEL_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1)
|
|
|
|
|
2011-01-28 14:00:51 -08:00
|
|
|
/* Flows. */
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_FLOW_FAMILY "ovs_flow"
|
|
|
|
#define OVS_FLOW_MCGROUP "ovs_flow"
|
2011-10-22 18:22:18 -07:00
|
|
|
#define OVS_FLOW_VERSION 0x1
|
2011-01-28 14:00:51 -08:00
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
enum ovs_flow_cmd {
|
|
|
|
OVS_FLOW_CMD_UNSPEC,
|
|
|
|
OVS_FLOW_CMD_NEW,
|
|
|
|
OVS_FLOW_CMD_DEL,
|
|
|
|
OVS_FLOW_CMD_GET,
|
|
|
|
OVS_FLOW_CMD_SET
|
2011-01-28 14:00:51 -08:00
|
|
|
};
|
2011-01-26 12:28:59 -08:00
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_flow_stats {
|
2011-10-22 18:50:44 -07:00
|
|
|
__u64 n_packets; /* Number of matched packets. */
|
|
|
|
__u64 n_bytes; /* Number of matched bytes. */
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2011-10-05 09:59:51 -07:00
|
|
|
enum ovs_key_attr {
|
2011-08-18 10:35:40 -07:00
|
|
|
OVS_KEY_ATTR_UNSPEC,
|
2011-11-14 15:56:43 -08:00
|
|
|
OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
|
2011-11-01 17:13:06 -07:00
|
|
|
OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
|
|
|
|
OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
|
2011-08-18 10:35:40 -07:00
|
|
|
OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
|
2011-11-14 15:56:43 -08:00
|
|
|
OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
|
2011-11-01 17:13:06 -07:00
|
|
|
OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
|
2011-08-18 10:35:40 -07:00
|
|
|
OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
|
|
|
|
OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
|
|
|
|
OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
|
|
|
|
OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
|
|
|
|
OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
|
|
|
|
OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
|
|
|
|
OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
|
|
|
|
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
|
2012-11-13 19:19:36 +02:00
|
|
|
OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
|
2014-11-08 07:24:42 -08:00
|
|
|
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
|
2013-08-22 20:24:43 +12:00
|
|
|
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
|
2013-10-28 13:54:40 -07:00
|
|
|
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
|
2014-11-08 07:24:42 -08:00
|
|
|
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
|
2014-04-11 01:41:18 -07:00
|
|
|
is not computed by the datapath. */
|
2014-03-28 13:36:28 -07:00
|
|
|
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
|
2014-06-24 20:56:57 +09:00
|
|
|
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
|
|
|
|
* The implementation may restrict
|
|
|
|
* the accepted length of the array. */
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
OVS_KEY_ATTR_CT_STATE, /* u32 bitmask of OVS_CS_F_* */
|
|
|
|
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */
|
datapath: Add original direction conntrack tuple to sw_flow_key.
Upstream commit:
commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc
Author: Jarno Rajahalme <jarno@ovn.org>
Date: Thu Feb 9 11:21:59 2017 -0800
openvswitch: Add original direction conntrack tuple to sw_flow_key.
Add the fields of the conntrack original direction 5-tuple to struct
sw_flow_key. The new fields are initially marked as non-existent, and
are populated whenever a conntrack action is executed and either finds
or generates a conntrack entry. This means that these fields exist
for all packets that were not rejected by conntrack as untrackable.
The original tuple fields in the sw_flow_key are filled from the
original direction tuple of the conntrack entry relating to the
current packet, or from the original direction tuple of the master
conntrack entry, if the current conntrack entry has a master.
Generally, expected connections of connections having an assigned
helper (e.g., FTP), have a master conntrack entry.
The main purpose of the new conntrack original tuple fields is to
allow matching on them for policy decision purposes, with the premise
that the admissibility of tracked connections reply packets (as well
as original direction packets), and both direction packets of any
related connections may be based on ACL rules applying to the master
connection's original direction 5-tuple. This also makes it easier to
make policy decisions when the actual packet headers might have been
transformed by NAT, as the original direction 5-tuple represents the
packet headers before any such transformation.
When using the original direction 5-tuple the admissibility of return
and/or related packets need not be based on the mere existence of a
conntrack entry, allowing separation of admission policy from the
established conntrack state. While existence of a conntrack entry is
required for admission of the return or related packets, policy
changes can render connections that were initially admitted to be
rejected or dropped afterwards. If the admission of the return and
related packets was based on mere conntrack state (e.g., connection
being in an established state), a policy change that would make the
connection rejected or dropped would need to find and delete all
conntrack entries affected by such a change. When using the original
direction 5-tuple matching the affected conntrack entries can be
allowed to time out instead, as the established state of the
connection would not need to be the basis for packet admission any
more.
It should be noted that the directionality of related connections may
be the same or different than that of the master connection, and
neither the original direction 5-tuple nor the conntrack state bits
carry this information. If needed, the directionality of the master
connection can be stored in master's conntrack mark or labels, which
are automatically inherited by the expected related connections.
The fact that neither ARP nor ND packets are trackable by conntrack
allows mutual exclusion between ARP/ND and the new conntrack original
tuple fields. Hence, the IP addresses are overlaid in union with ARP
and ND fields. This allows the sw_flow_key to not grow much due to
this patch, but it also means that we must be careful to never use the
new key fields with ARP or ND packets. ARP is easy to distinguish and
keep mutually exclusive based on the ethernet type, but ND being an
ICMPv6 protocol requires a bit more attention.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This patch squashes in minimal amount of OVS userspace code to not
break the build. Later patches contain the full userspace support.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
2017-03-08 17:18:22 -08:00
|
|
|
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
|
|
|
|
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
|
2018-01-31 21:53:06 +08:00
|
|
|
OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */
|
2014-06-24 20:56:57 +09:00
|
|
|
|
2022-03-25 14:48:23 +01:00
|
|
|
/* User space decided to squat on types 29 and 30. They are defined
|
|
|
|
* below, but should not be sent to the kernel.
|
|
|
|
*
|
|
|
|
* WARNING: No new types should be added unless they are defined
|
|
|
|
* for both kernel and user space (no 'ifdef's). It's hard
|
|
|
|
* to keep compatibility otherwise.
|
|
|
|
*/
|
|
|
|
OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */
|
2019-01-28 11:41:06 +00:00
|
|
|
OVS_KEY_ATTR_ND_EXTENSIONS, /* struct ovs_key_nd_extensions */
|
2017-06-20 07:30:41 +08:00
|
|
|
|
2022-03-25 14:48:23 +01:00
|
|
|
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info.
|
|
|
|
* For in-kernel use only.
|
|
|
|
*/
|
2011-08-18 10:35:40 -07:00
|
|
|
__OVS_KEY_ATTR_MAX
|
2011-01-23 18:44:44 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
|
2011-01-23 18:44:44 -08:00
|
|
|
|
2013-01-18 18:10:59 -08:00
|
|
|
enum ovs_tunnel_key_attr {
|
2014-11-08 07:24:42 -08:00
|
|
|
OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
|
2014-08-17 20:19:36 -07:00
|
|
|
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
|
2015-02-06 21:10:44 +01:00
|
|
|
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
|
2015-11-25 11:31:11 -02:00
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
|
2016-07-17 09:52:11 -07:00
|
|
|
OVS_TUNNEL_KEY_ATTR_PAD,
|
2018-03-12 11:28:08 -07:00
|
|
|
OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
|
2020-10-08 15:53:43 +02:00
|
|
|
#ifndef __KERNEL__
|
|
|
|
/* Only used within userspace data path. */
|
2019-11-25 11:19:23 -08:00
|
|
|
OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, /* struct gtpu_metadata */
|
2020-10-08 15:53:43 +02:00
|
|
|
#endif
|
2013-01-18 18:10:59 -08:00
|
|
|
__OVS_TUNNEL_KEY_ATTR_MAX
|
|
|
|
};
|
2014-11-08 07:24:42 -08:00
|
|
|
|
2013-01-18 18:10:59 -08:00
|
|
|
#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1)
|
|
|
|
|
2019-12-18 05:48:12 +01:00
|
|
|
/**
|
|
|
|
* enum xlate_error - Different types of error during translation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __KERNEL__
|
|
|
|
enum xlate_error {
|
|
|
|
XLATE_OK = 0,
|
|
|
|
XLATE_BRIDGE_NOT_FOUND,
|
|
|
|
XLATE_RECURSION_TOO_DEEP,
|
|
|
|
XLATE_TOO_MANY_RESUBMITS,
|
|
|
|
XLATE_STACK_TOO_DEEP,
|
|
|
|
XLATE_NO_RECIRCULATION_CONTEXT,
|
|
|
|
XLATE_RECIRCULATION_CONFLICT,
|
|
|
|
XLATE_TOO_MANY_MPLS_LABELS,
|
|
|
|
XLATE_INVALID_TUNNEL_METADATA,
|
|
|
|
XLATE_UNSUPPORTED_PACKET_TYPE,
|
|
|
|
XLATE_CONGESTION_DROP,
|
|
|
|
XLATE_FORWARDING_DISABLED,
|
|
|
|
XLATE_MAX,
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
Implement new fragment handling policy.
Until now, OVS has handled IP fragments more awkwardly than necessary. It
has not been possible to match on L4 headers, even in fragments with offset
0 where they are actually present. This means that there was no way to
implement ACLs that treat, say, different TCP ports differently, on
fragmented traffic; instead, all decisions for fragment forwarding had to
be made on the basis of L2 and L3 headers alone.
This commit improves the situation significantly. It is still not possible
to match on L4 headers in fragments with nonzero offset, because that
information is simply not present in such fragments, but this commit adds
the ability to match on L4 headers for fragments with zero offset. This
means that it becomes possible to implement ACLs that drop such "first
fragments" on the basis of L4 headers. In practice, that effectively
blocks even fragmented traffic on an L4 basis, because the receiving IP
stack cannot reassemble a full packet when the first fragment is missing.
This commit works by adding a new "fragment type" to the kernel flow match
and making it available through OpenFlow as a new NXM field named
NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields
are always 0 for IP fragments, it adds a new OpenFlow fragment handling
mode that fills in the L4 fields for "first fragments". It also enhances
ovs-ofctl to allow users to configure this new fragment handling mode and
to parse the new field.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Bug #7557.
2011-10-19 21:33:44 -07:00
|
|
|
/**
|
|
|
|
* enum ovs_frag_type - IPv4 and IPv6 fragment type
|
|
|
|
* @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
|
|
|
|
* @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
|
|
|
|
* @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
|
|
|
|
*
|
|
|
|
* Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
|
|
|
|
* ovs_key_ipv6.
|
|
|
|
*/
|
|
|
|
enum ovs_frag_type {
|
|
|
|
OVS_FRAG_TYPE_NONE,
|
|
|
|
OVS_FRAG_TYPE_FIRST,
|
|
|
|
OVS_FRAG_TYPE_LATER,
|
|
|
|
__OVS_FRAG_TYPE_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_ethernet {
|
2013-03-29 18:30:34 -07:00
|
|
|
__u8 eth_src[ETH_ALEN];
|
|
|
|
__u8 eth_dst[ETH_ALEN];
|
2011-01-23 18:44:44 -08:00
|
|
|
};
|
|
|
|
|
2013-01-25 16:22:07 +09:00
|
|
|
struct ovs_key_mpls {
|
2013-05-02 10:49:49 +09:00
|
|
|
__be32 mpls_lse;
|
2013-01-25 16:22:07 +09:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_ipv4 {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__be32 ipv4_src;
|
|
|
|
__be32 ipv4_dst;
|
|
|
|
__u8 ipv4_proto;
|
|
|
|
__u8 ipv4_tos;
|
2011-11-05 15:48:12 -07:00
|
|
|
__u8 ipv4_ttl;
|
Implement new fragment handling policy.
Until now, OVS has handled IP fragments more awkwardly than necessary. It
has not been possible to match on L4 headers, even in fragments with offset
0 where they are actually present. This means that there was no way to
implement ACLs that treat, say, different TCP ports differently, on
fragmented traffic; instead, all decisions for fragment forwarding had to
be made on the basis of L2 and L3 headers alone.
This commit improves the situation significantly. It is still not possible
to match on L4 headers in fragments with nonzero offset, because that
information is simply not present in such fragments, but this commit adds
the ability to match on L4 headers for fragments with zero offset. This
means that it becomes possible to implement ACLs that drop such "first
fragments" on the basis of L4 headers. In practice, that effectively
blocks even fragmented traffic on an L4 basis, because the receiving IP
stack cannot reassemble a full packet when the first fragment is missing.
This commit works by adding a new "fragment type" to the kernel flow match
and making it available through OpenFlow as a new NXM field named
NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields
are always 0 for IP fragments, it adds a new OpenFlow fragment handling
mode that fills in the L4 fields for "first fragments". It also enhances
ovs-ofctl to allow users to configure this new fragment handling mode and
to parse the new field.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Bug #7557.
2011-10-19 21:33:44 -07:00
|
|
|
__u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
|
2011-01-23 18:44:44 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_ipv6 {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__be32 ipv6_src[4];
|
|
|
|
__be32 ipv6_dst[4];
|
2011-11-01 15:57:56 -07:00
|
|
|
__be32 ipv6_label; /* 20-bits in least-significant bits. */
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__u8 ipv6_proto;
|
2011-11-09 12:17:38 -08:00
|
|
|
__u8 ipv6_tclass;
|
2011-11-05 15:48:12 -07:00
|
|
|
__u8 ipv6_hlimit;
|
Implement new fragment handling policy.
Until now, OVS has handled IP fragments more awkwardly than necessary. It
has not been possible to match on L4 headers, even in fragments with offset
0 where they are actually present. This means that there was no way to
implement ACLs that treat, say, different TCP ports differently, on
fragmented traffic; instead, all decisions for fragment forwarding had to
be made on the basis of L2 and L3 headers alone.
This commit improves the situation significantly. It is still not possible
to match on L4 headers in fragments with nonzero offset, because that
information is simply not present in such fragments, but this commit adds
the ability to match on L4 headers for fragments with zero offset. This
means that it becomes possible to implement ACLs that drop such "first
fragments" on the basis of L4 headers. In practice, that effectively
blocks even fragmented traffic on an L4 basis, because the receiving IP
stack cannot reassemble a full packet when the first fragment is missing.
This commit works by adding a new "fragment type" to the kernel flow match
and making it available through OpenFlow as a new NXM field named
NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields
are always 0 for IP fragments, it adds a new OpenFlow fragment handling
mode that fills in the L4 fields for "first fragments". It also enhances
ovs-ofctl to allow users to configure this new fragment handling mode and
to parse the new field.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Bug #7557.
2011-10-19 21:33:44 -07:00
|
|
|
__u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
|
2010-12-29 19:03:46 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_tcp {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__be16 tcp_src;
|
|
|
|
__be16 tcp_dst;
|
2011-01-23 18:44:44 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_udp {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__be16 udp_src;
|
|
|
|
__be16 udp_dst;
|
2011-01-23 18:44:44 -08:00
|
|
|
};
|
|
|
|
|
2013-08-22 20:24:43 +12:00
|
|
|
struct ovs_key_sctp {
|
|
|
|
__be16 sctp_src;
|
|
|
|
__be16 sctp_dst;
|
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_icmp {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__u8 icmp_type;
|
|
|
|
__u8 icmp_code;
|
2011-01-23 18:44:44 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_icmpv6 {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__u8 icmpv6_type;
|
|
|
|
__u8 icmpv6_code;
|
2010-12-29 19:03:46 -08:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_arp {
|
datapath-protocol: Use Linux kernel types directly.
We want datapath-protocol.h to be acceptable as a Linux kernel header, so
it must use Linux kernel types and must not have references to Open vSwitch
symbols or header files. This commit primarily makes that change to
datapath-protocol.h.
At the same time, at least for now we also want datapath-protocol.h to be
usable on non-Linux platforms, so we need some kind of compatiblity. Thus,
this commit also introduces a <linux/types.h> header file that defines the
necessary Linux kernel types on non-Linux platforms.
In turn, this requires openvswitch/types.h to use the Linux types directly
for ovs_be<N>; otherwise, sparse complains because now __be<N> and
ovs_be<N> are incompatible from its perspective, so this commit makes that
change too.
I don't have a non-Linux kernel platform readily available, so I only
tested the non-Linux part of the linux/types.h substitute by forcing that
case to be triggered with #if 0. It worked, except for errors in actual
Linux kernel headers included explicitly from OVS source files, so I think
it's likely to work in practice.
Bug #7559.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
2011-10-05 10:42:34 -07:00
|
|
|
__be32 arp_sip;
|
|
|
|
__be32 arp_tip;
|
|
|
|
__be16 arp_op;
|
2013-03-29 18:30:34 -07:00
|
|
|
__u8 arp_sha[ETH_ALEN];
|
|
|
|
__u8 arp_tha[ETH_ALEN];
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
struct ovs_key_nd {
|
2014-11-08 07:24:42 -08:00
|
|
|
__be32 nd_target[4];
|
|
|
|
__u8 nd_sll[ETH_ALEN];
|
|
|
|
__u8 nd_tll[ETH_ALEN];
|
2011-02-01 22:54:11 -08:00
|
|
|
};
|
|
|
|
|
2019-01-28 11:41:06 +00:00
|
|
|
#ifndef __KERNEL__
|
|
|
|
struct ovs_key_nd_extensions {
|
|
|
|
__be32 nd_reserved;
|
|
|
|
__u8 nd_options_type;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2017-03-08 17:18:22 -08:00
|
|
|
#define OVS_CT_LABELS_LEN_32 4
|
|
|
|
#define OVS_CT_LABELS_LEN (OVS_CT_LABELS_LEN_32 * sizeof(__u32))
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
struct ovs_key_ct_labels {
|
2017-03-08 17:18:22 -08:00
|
|
|
union {
|
|
|
|
__u8 ct_labels[OVS_CT_LABELS_LEN];
|
|
|
|
__u32 ct_labels_32[OVS_CT_LABELS_LEN_32];
|
|
|
|
};
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
};
|
|
|
|
|
2018-01-06 13:47:51 +08:00
|
|
|
enum ovs_nsh_key_attr {
|
|
|
|
OVS_NSH_KEY_ATTR_UNSPEC,
|
|
|
|
OVS_NSH_KEY_ATTR_BASE, /* struct ovs_nsh_key_base. */
|
|
|
|
OVS_NSH_KEY_ATTR_MD1, /* struct ovs_nsh_key_md1. */
|
|
|
|
OVS_NSH_KEY_ATTR_MD2, /* variable-length octets. */
|
|
|
|
__OVS_NSH_KEY_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
struct ovs_nsh_key_base {
|
|
|
|
__u8 flags;
|
2018-01-11 13:24:01 +08:00
|
|
|
__u8 ttl;
|
2018-01-06 13:47:51 +08:00
|
|
|
__u8 mdtype;
|
|
|
|
__u8 np;
|
|
|
|
__be32 path_hdr;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define NSH_MD1_CONTEXT_SIZE 4
|
|
|
|
|
|
|
|
struct ovs_nsh_key_md1 {
|
|
|
|
__be32 context[NSH_MD1_CONTEXT_SIZE];
|
userspace: Add support for NSH MD1 match fields
This patch adds support for NSH packet header fields to the OVS
control plane and the userspace datapath. Initially we support the
fields of the NSH base header as defined in
https://www.ietf.org/id/draft-ietf-sfc-nsh-13.txt
and the fixed context headers specified for metadata format MD1.
The variable length MD2 format is parsed but the TLV context headers
are not yet available for matching.
The NSH fields are modelled as experimenter fields with the dedicated
experimenter class 0x005ad650 proposed for NSH in ONF. The following
fields are defined:
NXOXM code ofctl name Size Comment
=====================================================================
NXOXM_NSH_FLAGS nsh_flags 8 Bits 2-9 of 1st NSH word
(0x005ad650,1)
NXOXM_NSH_MDTYPE nsh_mdtype 8 Bits 16-23
(0x005ad650,2)
NXOXM_NSH_NEXTPROTO nsh_np 8 Bits 24-31
(0x005ad650,3)
NXOXM_NSH_SPI nsh_spi 24 Bits 0-23 of 2nd NSH word
(0x005ad650,4)
NXOXM_NSH_SI nsh_si 8 Bits 24-31
(0x005ad650,5)
NXOXM_NSH_C1 nsh_c1 32 Maskable, nsh_mdtype==1
(0x005ad650,6)
NXOXM_NSH_C2 nsh_c2 32 Maskable, nsh_mdtype==1
(0x005ad650,7)
NXOXM_NSH_C3 nsh_c3 32 Maskable, nsh_mdtype==1
(0x005ad650,8)
NXOXM_NSH_C4 nsh_c4 32 Maskable, nsh_mdtype==1
(0x005ad650,9)
Co-authored-by: Johnson Li <johnson.li@intel.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2017-08-05 13:41:08 +08:00
|
|
|
};
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
/* OVS_KEY_ATTR_CT_STATE flags */
|
|
|
|
#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */
|
|
|
|
#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */
|
|
|
|
#define OVS_CS_F_RELATED 0x04 /* Related to an established
|
|
|
|
* connection. */
|
|
|
|
#define OVS_CS_F_REPLY_DIR 0x08 /* Flow is in the reply direction. */
|
|
|
|
#define OVS_CS_F_INVALID 0x10 /* Could not track connection. */
|
|
|
|
#define OVS_CS_F_TRACKED 0x20 /* Conntrack has occurred. */
|
2015-11-24 15:47:56 -08:00
|
|
|
#define OVS_CS_F_SRC_NAT 0x40 /* Packet's source address/port was
|
|
|
|
mangled by NAT. */
|
|
|
|
#define OVS_CS_F_DST_NAT 0x80 /* Packet's destination address/port
|
|
|
|
was mangled by NAT. */
|
|
|
|
|
|
|
|
#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
|
datapath: Add original direction conntrack tuple to sw_flow_key.
Upstream commit:
commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc
Author: Jarno Rajahalme <jarno@ovn.org>
Date: Thu Feb 9 11:21:59 2017 -0800
openvswitch: Add original direction conntrack tuple to sw_flow_key.
Add the fields of the conntrack original direction 5-tuple to struct
sw_flow_key. The new fields are initially marked as non-existent, and
are populated whenever a conntrack action is executed and either finds
or generates a conntrack entry. This means that these fields exist
for all packets that were not rejected by conntrack as untrackable.
The original tuple fields in the sw_flow_key are filled from the
original direction tuple of the conntrack entry relating to the
current packet, or from the original direction tuple of the master
conntrack entry, if the current conntrack entry has a master.
Generally, expected connections of connections having an assigned
helper (e.g., FTP), have a master conntrack entry.
The main purpose of the new conntrack original tuple fields is to
allow matching on them for policy decision purposes, with the premise
that the admissibility of tracked connections reply packets (as well
as original direction packets), and both direction packets of any
related connections may be based on ACL rules applying to the master
connection's original direction 5-tuple. This also makes it easier to
make policy decisions when the actual packet headers might have been
transformed by NAT, as the original direction 5-tuple represents the
packet headers before any such transformation.
When using the original direction 5-tuple the admissibility of return
and/or related packets need not be based on the mere existence of a
conntrack entry, allowing separation of admission policy from the
established conntrack state. While existence of a conntrack entry is
required for admission of the return or related packets, policy
changes can render connections that were initially admitted to be
rejected or dropped afterwards. If the admission of the return and
related packets was based on mere conntrack state (e.g., connection
being in an established state), a policy change that would make the
connection rejected or dropped would need to find and delete all
conntrack entries affected by such a change. When using the original
direction 5-tuple matching the affected conntrack entries can be
allowed to time out instead, as the established state of the
connection would not need to be the basis for packet admission any
more.
It should be noted that the directionality of related connections may
be the same or different than that of the master connection, and
neither the original direction 5-tuple nor the conntrack state bits
carry this information. If needed, the directionality of the master
connection can be stored in master's conntrack mark or labels, which
are automatically inherited by the expected related connections.
The fact that neither ARP nor ND packets are trackable by conntrack
allows mutual exclusion between ARP/ND and the new conntrack original
tuple fields. Hence, the IP addresses are overlaid in union with ARP
and ND fields. This allows the sw_flow_key to not grow much due to
this patch, but it also means that we must be careful to never use the
new key fields with ARP or ND packets. ARP is easy to distinguish and
keep mutually exclusive based on the ethernet type, but ND being an
ICMPv6 protocol requires a bit more attention.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This patch squashes in minimal amount of OVS userspace code to not
break the build. Later patches contain the full userspace support.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
2017-03-08 17:18:22 -08:00
|
|
|
struct ovs_key_ct_tuple_ipv4 {
|
|
|
|
__be32 ipv4_src;
|
|
|
|
__be32 ipv4_dst;
|
|
|
|
__be16 src_port;
|
|
|
|
__be16 dst_port;
|
|
|
|
__u8 ipv4_proto;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_ct_tuple_ipv6 {
|
|
|
|
__be32 ipv6_src[4];
|
|
|
|
__be32 ipv6_dst[4];
|
|
|
|
__be16 src_port;
|
|
|
|
__be16 dst_port;
|
|
|
|
__u8 ipv6_proto;
|
|
|
|
};
|
|
|
|
|
2011-01-26 15:42:00 -08:00
|
|
|
/**
|
2011-08-18 10:35:40 -07:00
|
|
|
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
|
|
|
|
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
|
2011-01-28 14:00:51 -08:00
|
|
|
* key. Always present in notifications. Required for all requests (except
|
|
|
|
* dumps).
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
|
2011-08-18 10:31:15 -07:00
|
|
|
* the actions to take for packets that match the key. Always present in
|
2011-11-01 14:16:41 -07:00
|
|
|
* notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
|
2014-03-24 17:34:48 -07:00
|
|
|
* %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without
|
|
|
|
* %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions,
|
|
|
|
* an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
|
2011-01-28 14:00:51 -08:00
|
|
|
* flow. Present in notifications if the stats would be nonzero. Ignored in
|
|
|
|
* requests.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
|
2011-01-28 14:00:51 -08:00
|
|
|
* TCP flags seen on packets in this flow. Only present in notifications for
|
|
|
|
* TCP flows, and only if it would be nonzero. Ignored in requests.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
|
2011-01-28 14:00:51 -08:00
|
|
|
* the system monotonic clock, at which a packet was last processed for this
|
|
|
|
* flow. Only present in notifications if a packet has been processed for this
|
|
|
|
* flow. Ignored in requests.
|
2011-08-18 10:35:40 -07:00
|
|
|
* @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
|
2011-01-28 14:00:51 -08:00
|
|
|
* last-used time, accumulated TCP flags, and statistics for this flow.
|
|
|
|
* Otherwise ignored in requests. Never present in notifications.
|
2013-06-17 07:51:00 -07:00
|
|
|
* @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
|
|
|
|
* mask bits for wildcarded flow match. Mask bit value '1' specifies exact
|
|
|
|
* match with corresponding flow key bit, while mask bit value '0' specifies
|
|
|
|
* a wildcarded match. Omitting attribute is treated as wildcarding all
|
|
|
|
* corresponding fields. Optional for all requests. If not present,
|
|
|
|
* all flow key bits are exact match bits.
|
2015-01-21 16:42:52 -08:00
|
|
|
* @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
|
|
|
|
* identifier for the flow. Causes the flow to be indexed by this value rather
|
|
|
|
* than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
|
|
|
|
* requests. Present in notifications if the flow was created with this
|
|
|
|
* attribute.
|
|
|
|
* @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
|
|
|
|
* flags that provide alternative semantics for flow installation and
|
|
|
|
* retrieval. Optional for all requests.
|
2011-01-26 15:42:00 -08:00
|
|
|
*
|
2011-08-18 10:35:40 -07:00
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_FLOW_* commands.
|
2011-01-26 15:42:00 -08:00
|
|
|
*/
|
2011-08-18 10:35:40 -07:00
|
|
|
enum ovs_flow_attr {
|
|
|
|
OVS_FLOW_ATTR_UNSPEC,
|
|
|
|
OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
|
|
|
|
OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
|
|
|
|
OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
|
|
|
|
OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
|
|
|
|
OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
|
|
|
|
OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
|
2013-06-17 07:51:00 -07:00
|
|
|
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
|
2014-09-12 11:20:13 -07:00
|
|
|
OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
|
|
|
|
* logging should be suppressed. */
|
2014-11-21 16:49:40 -08:00
|
|
|
OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */
|
|
|
|
OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
|
2016-07-17 09:52:10 -07:00
|
|
|
OVS_FLOW_ATTR_PAD,
|
2011-08-18 10:35:40 -07:00
|
|
|
__OVS_FLOW_ATTR_MAX
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2014-11-21 16:49:40 -08:00
|
|
|
/**
|
|
|
|
* Omit attributes for notifications.
|
|
|
|
*
|
|
|
|
* If a datapath request contains an OVS_UFID_F_OMIT_* flag, then the datapath
|
|
|
|
* may omit the corresponding 'ovs_flow_attr' from the response.
|
|
|
|
*/
|
|
|
|
#define OVS_UFID_F_OMIT_KEY (1 << 0)
|
|
|
|
#define OVS_UFID_F_OMIT_MASK (1 << 1)
|
|
|
|
#define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
|
|
|
|
|
2011-09-28 10:43:07 -07:00
|
|
|
/**
|
2011-10-12 16:24:54 -07:00
|
|
|
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
|
2011-09-28 10:43:07 -07:00
|
|
|
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
|
|
|
|
* @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
|
|
|
|
* %UINT32_MAX samples all packets and intermediate values sample intermediate
|
|
|
|
* fractions of packets.
|
|
|
|
* @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
|
|
|
|
* Actions are passed as nested attributes.
|
2011-10-12 16:24:54 -07:00
|
|
|
*
|
|
|
|
* Executes the specified actions with the given probability on a per-packet
|
|
|
|
* basis.
|
2011-09-28 10:43:07 -07:00
|
|
|
*/
|
|
|
|
enum ovs_sample_attr {
|
|
|
|
OVS_SAMPLE_ATTR_UNSPEC,
|
|
|
|
OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
|
|
|
|
OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
|
|
|
|
__OVS_SAMPLE_ATTR_MAX,
|
datapath: openvswitch: Optimize sample action for the clone use cases
Upstream commit:
openvswitch: Optimize sample action for the clone use cases
With the introduction of open flow 'clone' action, the OVS user space
can now translate the 'clone' action into kernel datapath 'sample'
action, with 100% probability, to ensure that the clone semantics,
which is that the packet seen by the clone action is the same as the
packet seen by the action after clone, is faithfully carried out
in the datapath.
While the sample action in the datpath has the matching semantics,
its implementation is only optimized for its original use.
Specifically, there are two limitation: First, there is a 3 level of
nesting restriction, enforced at the flow downloading time. This
limit turns out to be too restrictive for the 'clone' use case.
Second, the implementation avoid recursive call only if the sample
action list has a single userspace action.
The main optimization implemented in this series removes the static
nesting limit check, instead, implement the run time recursion limit
check, and recursion avoidance similar to that of the 'recirc' action.
This optimization solve both #1 and #2 issues above.
One related optimization attempts to avoid copying flow key as
long as the actions enclosed does not change the flow key. The
detection is performed only once at the flow downloading time.
Another related optimization is to rewrite the action list
at flow downloading time in order to save the fast path from parsing
the sample action list in its original form repeatedly.
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Upstream: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases")
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
2017-04-06 13:05:48 -07:00
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
OVS_SAMPLE_ATTR_ARG /* struct sample_arg */
|
|
|
|
#endif
|
2011-09-28 10:43:07 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
|
|
|
|
|
datapath: openvswitch: Optimize sample action for the clone use cases
Upstream commit:
openvswitch: Optimize sample action for the clone use cases
With the introduction of open flow 'clone' action, the OVS user space
can now translate the 'clone' action into kernel datapath 'sample'
action, with 100% probability, to ensure that the clone semantics,
which is that the packet seen by the clone action is the same as the
packet seen by the action after clone, is faithfully carried out
in the datapath.
While the sample action in the datpath has the matching semantics,
its implementation is only optimized for its original use.
Specifically, there are two limitation: First, there is a 3 level of
nesting restriction, enforced at the flow downloading time. This
limit turns out to be too restrictive for the 'clone' use case.
Second, the implementation avoid recursive call only if the sample
action list has a single userspace action.
The main optimization implemented in this series removes the static
nesting limit check, instead, implement the run time recursion limit
check, and recursion avoidance similar to that of the 'recirc' action.
This optimization solve both #1 and #2 issues above.
One related optimization attempts to avoid copying flow key as
long as the actions enclosed does not change the flow key. The
detection is performed only once at the flow downloading time.
Another related optimization is to rewrite the action list
at flow downloading time in order to save the fast path from parsing
the sample action list in its original form repeatedly.
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Upstream: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases")
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
2017-04-06 13:05:48 -07:00
|
|
|
#ifdef __KERNEL__
|
|
|
|
struct sample_arg {
|
|
|
|
bool exec; /* When true, actions in sample will not
|
|
|
|
* change flow keys. False otherwise.
|
|
|
|
*/
|
|
|
|
u32 probability; /* Same value as
|
|
|
|
* 'OVS_SAMPLE_ATTR_PROBABILITY'.
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2011-10-12 16:24:54 -07:00
|
|
|
/**
|
|
|
|
* enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
|
|
|
|
* @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
|
|
|
|
* message should be sent. Required.
|
2013-02-15 16:48:32 -08:00
|
|
|
* @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
|
|
|
|
* copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
|
2014-08-17 20:19:36 -07:00
|
|
|
* @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
|
|
|
|
* tunnel info.
|
2015-06-11 09:43:58 -07:00
|
|
|
* @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall.
|
2011-10-12 16:24:54 -07:00
|
|
|
*/
|
|
|
|
enum ovs_userspace_attr {
|
|
|
|
OVS_USERSPACE_ATTR_UNSPEC,
|
|
|
|
OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
|
2013-02-15 16:48:32 -08:00
|
|
|
OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
|
2014-08-17 20:19:36 -07:00
|
|
|
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port
|
|
|
|
* to get tunnel info. */
|
2015-06-11 09:43:58 -07:00
|
|
|
OVS_USERSPACE_ATTR_ACTIONS, /* Optional flag to get actions. */
|
2011-10-12 16:24:54 -07:00
|
|
|
__OVS_USERSPACE_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
|
|
|
|
|
2016-06-24 07:42:29 -07:00
|
|
|
struct ovs_action_trunc {
|
|
|
|
uint32_t max_len; /* Max packet size in bytes. */
|
|
|
|
};
|
|
|
|
|
2013-01-25 16:22:07 +09:00
|
|
|
/**
|
|
|
|
* struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
|
|
|
|
* @mpls_lse: MPLS label stack entry to push.
|
|
|
|
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
|
|
|
|
*
|
|
|
|
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
|
|
|
|
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
|
|
|
|
*/
|
|
|
|
struct ovs_action_push_mpls {
|
|
|
|
__be32 mpls_lse;
|
|
|
|
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
|
|
|
|
};
|
|
|
|
|
2021-11-29 11:52:05 +05:30
|
|
|
/**
|
|
|
|
* struct ovs_action_add_mpls - %OVS_ACTION_ATTR_ADD_MPLS action
|
|
|
|
* argument.
|
|
|
|
* @mpls_lse: MPLS label stack entry to push.
|
|
|
|
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
|
|
|
|
* @tun_flags: MPLS tunnel attributes.
|
|
|
|
*
|
|
|
|
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
|
|
|
|
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
|
|
|
|
*/
|
|
|
|
struct ovs_action_add_mpls {
|
|
|
|
__be32 mpls_lse;
|
|
|
|
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
|
|
|
|
__u16 tun_flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_MPLS_L3_TUNNEL_FLAG_MASK (1 << 0) /* Flag to specify the place of
|
|
|
|
* insertion of MPLS header.
|
|
|
|
* When false, the MPLS header
|
|
|
|
* will be inserted at the start
|
|
|
|
* of the packet.
|
|
|
|
* When true, the MPLS header
|
|
|
|
* will be inserted at the start
|
|
|
|
* of the l3 header.
|
|
|
|
*/
|
|
|
|
|
2011-11-14 15:56:43 -08:00
|
|
|
/**
|
|
|
|
* struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
|
|
|
|
* @vlan_tpid: Tag protocol identifier (TPID) to push.
|
2011-11-14 17:19:41 -08:00
|
|
|
* @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
|
|
|
|
* (but it will not be set in the 802.1Q header that is pushed).
|
2011-11-14 15:56:43 -08:00
|
|
|
*
|
2017-02-13 10:39:13 +08:00
|
|
|
* The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
|
|
|
|
* The only acceptable TPID values are those that the kernel module also parses
|
|
|
|
* as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
|
|
|
|
* by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
|
2011-11-14 15:56:43 -08:00
|
|
|
*/
|
|
|
|
struct ovs_action_push_vlan {
|
2017-02-13 10:39:13 +08:00
|
|
|
__be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */
|
2011-11-14 15:56:43 -08:00
|
|
|
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
|
|
|
|
};
|
|
|
|
|
2014-03-04 15:36:03 -08:00
|
|
|
/* Data path hash algorithm for computing Datapath hash.
|
|
|
|
*
|
2014-04-08 18:42:39 -07:00
|
|
|
* The algorithm type only specifies the fields in a flow
|
2014-03-04 15:36:03 -08:00
|
|
|
* will be used as part of the hash. Each datapath is free
|
|
|
|
* to use its own hash algorithm. The hash value will be
|
|
|
|
* opaque to the user space daemon.
|
|
|
|
*/
|
2014-04-08 18:42:39 -07:00
|
|
|
enum ovs_hash_alg {
|
|
|
|
OVS_HASH_ALG_L4,
|
2018-05-24 17:27:59 +02:00
|
|
|
#ifndef __KERNEL__
|
|
|
|
OVS_HASH_ALG_SYM_L4,
|
|
|
|
#endif
|
|
|
|
__OVS_HASH_MAX
|
2014-03-04 15:36:03 -08:00
|
|
|
};
|
2014-11-08 07:24:42 -08:00
|
|
|
|
2014-03-04 15:36:03 -08:00
|
|
|
/*
|
2014-04-08 18:42:39 -07:00
|
|
|
* struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
|
2014-03-04 15:36:03 -08:00
|
|
|
* @hash_alg: Algorithm used to compute hash prior to recirculation.
|
2014-04-17 20:06:58 -07:00
|
|
|
* @hash_basis: basis used for computing hash.
|
2014-03-04 15:36:03 -08:00
|
|
|
*/
|
2014-04-08 18:42:39 -07:00
|
|
|
struct ovs_action_hash {
|
2014-11-08 07:24:42 -08:00
|
|
|
uint32_t hash_alg; /* One of ovs_hash_alg. */
|
2014-04-17 20:06:58 -07:00
|
|
|
uint32_t hash_basis;
|
2014-03-04 15:36:03 -08:00
|
|
|
};
|
|
|
|
|
2014-11-11 11:53:47 -08:00
|
|
|
#ifndef __KERNEL__
|
2015-06-22 14:23:37 -07:00
|
|
|
#define TNL_PUSH_HEADER_SIZE 512
|
2014-11-11 11:53:47 -08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* struct ovs_action_push_tnl - %OVS_ACTION_ATTR_TUNNEL_PUSH
|
|
|
|
* @tnl_port: To identify tunnel port to pass header info.
|
|
|
|
* @out_port: Physical port to send encapsulated packet.
|
|
|
|
* @header_len: Length of the header to be pushed.
|
|
|
|
* @tnl_type: This is only required to format this header. Otherwise
|
|
|
|
* ODP layer can not parse %header.
|
|
|
|
* @header: Partial header for the tunnel. Tunnel push action can use
|
|
|
|
* this header to build final header according to actual packet parameters.
|
|
|
|
*/
|
|
|
|
struct ovs_action_push_tnl {
|
2017-06-18 08:59:30 +08:00
|
|
|
odp_port_t tnl_port;
|
|
|
|
odp_port_t out_port;
|
2014-11-11 11:53:47 -08:00
|
|
|
uint32_t header_len;
|
|
|
|
uint32_t tnl_type; /* For logging. */
|
packets: Remove unnecessary "packed" annotations.
I know of two reasons to mark a structure as "packed". The first is
because the structure must match some defined interface and therefore
compiler-inserted padding between or after members would cause its layout
to diverge from that interface. This is not a problem in a structure that
follows the general alignment rules that are seen in ABIs for all the
architectures that OVS cares about: basically, that a struct member needs
to be aligned on a boundary that is a multiple of the member's size.
The second reason is because instances of the struct tend to be at
misaligned addresses.
struct eth_header and struct vlan_eth_header are normally aligned on
16-bit boundaries (at least), and they contain only 16-bit members, so
there's no need to pack them. This commit removes the packed annotation.
This commit also removes the packed annotation from struct llc_header.
Since that struct only contains 8-bit members, I don't know of any benefit
to packing it, period.
This commit also removes a few more packed annotations that are much less
important.
When these packed annotations were removed, it caused a few warnings
related to casts from 'uint8_t *' to more strictly aligned pointer types,
related to struct ovs_action_push_tnl. That's because that struct had a
trailing member used to store packet headers, that was declared as
a uint8_t[]. Before, when this was cast to 'struct eth_header *', there
was no change in alignment since eth_header was packed; now that
eth_header is not packed, the compiler considers it suspicious. This
commit avoids that problem by changing the member from uint8_t[] to
uint32_t[], which assures the compiler that it is properly aligned.
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
2017-05-30 08:22:03 -07:00
|
|
|
uint32_t header[TNL_PUSH_HEADER_SIZE / 4];
|
2014-11-11 11:53:47 -08:00
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
/**
|
|
|
|
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
|
|
|
|
* @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
|
|
|
|
* table. This allows future packets for the same connection to be identified
|
2015-12-02 23:53:54 -08:00
|
|
|
* as 'established' or 'related'. The flow key for the current packet will
|
|
|
|
* retain the pre-commit connection state.
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
* @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
|
|
|
|
* mask, the corresponding bit in the value is copied to the connection
|
|
|
|
* tracking mark field in the connection.
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
* @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
|
|
|
|
* mask. For each bit set in the mask, the corresponding bit in the value is
|
|
|
|
* copied to the connection tracking label field in the connection.
|
Add support for connection tracking helper/ALGs.
This patch adds support for specifying a "helper" or ALG to assist
connection tracking for protocols that consist of multiple streams.
Initially, only support for FTP is included.
Below is an example set of flows to allow FTP control connections from
port 1->2 to establish active data connections in the reverse direction:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(alg=ftp,commit),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(table=1)
table=1,in_port=2,tcp,ct_state=+trk+est,action=1
table=1,in_port=2,tcp,ct_state=+trk+rel,action=ct(commit),1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-15 14:29:16 -07:00
|
|
|
* @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
|
2017-03-08 17:18:23 -08:00
|
|
|
* @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address
|
|
|
|
* translation (NAT) on the packet.
|
2017-03-08 17:18:23 -08:00
|
|
|
* @OVS_CT_ATTR_FORCE_COMMIT: Like %OVS_CT_ATTR_COMMIT, but instead of doing
|
|
|
|
* nothing if the connection is already committed will check that the current
|
|
|
|
* packet is in conntrack entry's original direction. If directionality does
|
|
|
|
* not match, will delete the existing conntrack entry and create a new one.
|
2017-04-27 10:34:42 -07:00
|
|
|
* @OVS_CT_ATTR_EVENTMASK: Mask of bits indicating which conntrack event types
|
|
|
|
* (enum ip_conntrack_events IPCT_*) should be reported. For any bit set to
|
|
|
|
* zero, the corresponding event type is not generated. Default behavior
|
|
|
|
* depends on system configuration, but typically all event types are
|
|
|
|
* generated, hence listening on NFNLGRP_CONNTRACK_UPDATE events may get a lot
|
|
|
|
* of events. Explicitly passing this attribute allows limiting the updates
|
|
|
|
* received to the events of interest. The bit 1 << IPCT_NEW, 1 <<
|
|
|
|
* IPCT_RELATED, and 1 << IPCT_DESTROY must be set to ones for those events to
|
|
|
|
* be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups,
|
|
|
|
* respectively. Remaining bits control the changes for which an event is
|
|
|
|
* delivered on the NFNLGRP_CONNTRACK_UPDATE group.
|
2019-08-28 15:14:28 -07:00
|
|
|
* @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout.
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
*/
|
|
|
|
enum ovs_ct_attr {
|
|
|
|
OVS_CT_ATTR_UNSPEC,
|
|
|
|
OVS_CT_ATTR_COMMIT, /* No argument, commits connection. */
|
|
|
|
OVS_CT_ATTR_ZONE, /* u16 zone id. */
|
Add connection tracking mark support.
This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-18 13:58:00 -07:00
|
|
|
OVS_CT_ATTR_MARK, /* mark to associate with this connection. */
|
Add connection tracking label support.
This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.
For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-10-13 11:13:10 -07:00
|
|
|
OVS_CT_ATTR_LABELS, /* label to associate with this connection. */
|
Add support for connection tracking helper/ALGs.
This patch adds support for specifying a "helper" or ALG to assist
connection tracking for protocols that consist of multiple streams.
Initially, only support for FTP is included.
Below is an example set of flows to allow FTP control connections from
port 1->2 to establish active data connections in the reverse direction:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,action=ct(alg=ftp,commit),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(table=1)
table=1,in_port=2,tcp,ct_state=+trk+est,action=1
table=1,in_port=2,tcp,ct_state=+trk+rel,action=ct(commit),1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-09-15 14:29:16 -07:00
|
|
|
OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of
|
|
|
|
related connections. */
|
2015-11-24 15:47:56 -08:00
|
|
|
OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
|
2017-03-08 17:18:23 -08:00
|
|
|
OVS_CT_ATTR_FORCE_COMMIT, /* No argument */
|
2017-04-27 10:34:42 -07:00
|
|
|
OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */
|
2019-08-28 15:14:28 -07:00
|
|
|
OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for
|
|
|
|
* fine-grain timeout tuning. */
|
|
|
|
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
__OVS_CT_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
|
|
|
|
|
2017-02-06 21:04:41 +08:00
|
|
|
/*
|
|
|
|
* struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
|
|
|
|
* @addresses: Source and destination MAC addresses.
|
|
|
|
*/
|
|
|
|
struct ovs_action_push_eth {
|
|
|
|
struct ovs_key_ethernet addresses;
|
|
|
|
};
|
|
|
|
|
2015-11-24 15:47:56 -08:00
|
|
|
/**
|
|
|
|
* enum ovs_nat_attr - Attributes for %OVS_CT_ATTR_NAT.
|
|
|
|
*
|
|
|
|
* @OVS_NAT_ATTR_SRC: Flag for Source NAT (mangle source address/port).
|
|
|
|
* @OVS_NAT_ATTR_DST: Flag for Destination NAT (mangle destination
|
|
|
|
* address/port). Only one of (@OVS_NAT_ATTR_SRC, @OVS_NAT_ATTR_DST) may be
|
|
|
|
* specified. Effective only for packets for ct_state NEW connections.
|
|
|
|
* Committed connections are mangled by the NAT action according to the
|
|
|
|
* committed NAT type regardless of the flags specified. As a corollary, a NAT
|
|
|
|
* action without a NAT type flag will only mangle packets of committed
|
|
|
|
* connections. The following NAT attributes only apply for NEW connections,
|
|
|
|
* and they may be included only when the CT action has the @OVS_CT_ATTR_COMMIT
|
|
|
|
* flag and either @OVS_NAT_ATTR_SRC, @OVS_NAT_ATTR_DST is also included.
|
|
|
|
* @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr
|
|
|
|
* @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr
|
|
|
|
* @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port)
|
|
|
|
* @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port)
|
|
|
|
* @OVS_NAT_ATTR_PERSISTENT: Flag for persistent IP mapping across reboots
|
|
|
|
* @OVS_NAT_ATTR_PROTO_HASH: Flag for pseudo random L4 port mapping (MD5)
|
|
|
|
* @OVS_NAT_ATTR_PROTO_RANDOM: Flag for fully randomized L4 port mapping
|
|
|
|
*/
|
|
|
|
enum ovs_nat_attr {
|
|
|
|
OVS_NAT_ATTR_UNSPEC,
|
|
|
|
OVS_NAT_ATTR_SRC,
|
|
|
|
OVS_NAT_ATTR_DST,
|
|
|
|
OVS_NAT_ATTR_IP_MIN,
|
|
|
|
OVS_NAT_ATTR_IP_MAX,
|
|
|
|
OVS_NAT_ATTR_PROTO_MIN,
|
|
|
|
OVS_NAT_ATTR_PROTO_MAX,
|
|
|
|
OVS_NAT_ATTR_PERSISTENT,
|
|
|
|
OVS_NAT_ATTR_PROTO_HASH,
|
|
|
|
OVS_NAT_ATTR_PROTO_RANDOM,
|
|
|
|
__OVS_NAT_ATTR_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
|
|
|
|
|
Add a new OVS action check_pkt_larger
This patch adds a new action 'check_pkt_larger' which checks if the
packet is larger than the given size and stores the result in the
destination register.
Usage: check_pkt_larger(len)->REGISTER
Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next;
This patch makes use of the new datapath action - 'check_pkt_len'
which was recently added in the commit [1].
At the start of ovs-vswitchd, datapath is probed for this action.
If the datapath action is present, then 'check_pkt_larger'
makes use of this datapath action.
Datapath action 'check_pkt_len' takes these nlattrs
* OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions
to apply if the packet length is greater than the specified 'pkt_len'
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested
actions to apply if the packet length is lesser or equal to the
specified 'pkt_len'.
Let's say we have these flows added to an OVS bridge br-int
table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1)
table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3
table=1, priority=100,in_port=1,ip,actions=output:4
Then the action 'check_pkt_larger' will be translated as
- check_pkt_len(size=100,gt(3),le(4))
datapath will check the packet length and if the packet length is greater than 100,
it will output to port 3, else it will output to port 4.
In case, datapath doesn't support 'check_pkt_len' action, the OVS action
'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added.
This OVS action is intended to be used by OVN to check the packet length
and generate an ICMP packet with type 3, code 4 and next hop mtu
in the logical router pipeline if the MTU of the physical interface
is lesser than the packet length. More information can be found here [2]
[1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9
[2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
CC: Ben Pfaff <blp@ovn.org>
CC: Gregory Rose <gvrose8192@gmail.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
|
|
|
/*
|
|
|
|
* enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN.
|
|
|
|
*
|
|
|
|
* @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for.
|
|
|
|
* @OVS_CHECK_PKT_LEN_ATTR_USERSPACE_COND: u8 comparison condition to send
|
|
|
|
* the packet to userspace. One of OVS_CHECK_PKT_LEN_COND_*.
|
|
|
|
* @OVS_CHECK_PKT_LEN_ATTR_USERPACE - Nested OVS_USERSPACE_ATTR_* actions.
|
|
|
|
*/
|
|
|
|
enum ovs_check_pkt_len_attr {
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_UNSPEC,
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_PKT_LEN,
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER,
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL,
|
|
|
|
__OVS_CHECK_PKT_LEN_ATTR_MAX,
|
2019-04-23 00:53:43 +05:30
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */
|
|
|
|
#endif
|
Add a new OVS action check_pkt_larger
This patch adds a new action 'check_pkt_larger' which checks if the
packet is larger than the given size and stores the result in the
destination register.
Usage: check_pkt_larger(len)->REGISTER
Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next;
This patch makes use of the new datapath action - 'check_pkt_len'
which was recently added in the commit [1].
At the start of ovs-vswitchd, datapath is probed for this action.
If the datapath action is present, then 'check_pkt_larger'
makes use of this datapath action.
Datapath action 'check_pkt_len' takes these nlattrs
* OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions
to apply if the packet length is greater than the specified 'pkt_len'
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested
actions to apply if the packet length is lesser or equal to the
specified 'pkt_len'.
Let's say we have these flows added to an OVS bridge br-int
table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1)
table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3
table=1, priority=100,in_port=1,ip,actions=output:4
Then the action 'check_pkt_larger' will be translated as
- check_pkt_len(size=100,gt(3),le(4))
datapath will check the packet length and if the packet length is greater than 100,
it will output to port 3, else it will output to port 4.
In case, datapath doesn't support 'check_pkt_len' action, the OVS action
'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added.
This OVS action is intended to be used by OVN to check the packet length
and generate an ICMP packet with type 3, code 4 and next hop mtu
in the logical router pipeline if the MTU of the physical interface
is lesser than the packet length. More information can be found here [2]
[1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9
[2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
CC: Ben Pfaff <blp@ovn.org>
CC: Gregory Rose <gvrose8192@gmail.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1)
|
|
|
|
|
2019-04-23 00:53:43 +05:30
|
|
|
#ifdef __KERNEL__
|
|
|
|
struct check_pkt_len_arg {
|
|
|
|
u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */
|
|
|
|
bool exec_for_greater; /* When true, actions in IF_GREATE will
|
|
|
|
* not change flow keys. False otherwise.
|
|
|
|
*/
|
|
|
|
bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL
|
|
|
|
* will not change flow keys. False
|
|
|
|
* otherwise.
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2024-07-13 23:23:38 +02:00
|
|
|
#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
|
|
|
|
/**
|
|
|
|
* enum ovs_pample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE
|
|
|
|
* action.
|
|
|
|
*
|
|
|
|
* @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the
|
|
|
|
* sample.
|
|
|
|
* @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that
|
|
|
|
* contains user-defined metadata. The maximum length is
|
|
|
|
* OVS_PSAMPLE_COOKIE_MAX_SIZE bytes.
|
|
|
|
*
|
|
|
|
* Sends the packet to the psample multicast group with the specified group and
|
|
|
|
* cookie. It is possible to combine this action with the
|
|
|
|
* %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample.
|
|
|
|
*/
|
|
|
|
enum ovs_psample_attr {
|
|
|
|
OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */
|
|
|
|
OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */
|
|
|
|
|
|
|
|
/* private: */
|
|
|
|
__OVS_PSAMPLE_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1)
|
|
|
|
|
2011-10-21 14:38:54 -07:00
|
|
|
/**
|
2011-10-25 14:03:35 -07:00
|
|
|
* enum ovs_action_attr - Action types.
|
2011-10-21 14:38:54 -07:00
|
|
|
*
|
2011-10-25 14:03:35 -07:00
|
|
|
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
|
2016-06-24 07:42:29 -07:00
|
|
|
* @OVS_ACTION_ATTR_TRUNC: Output packet to port with truncated packet size.
|
2011-10-25 14:03:35 -07:00
|
|
|
* @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
|
|
|
|
* %OVS_USERSPACE_ATTR_* attributes.
|
2017-02-13 10:39:13 +08:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
|
|
|
|
* onto the packet.
|
|
|
|
* @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
|
|
|
|
* from the packet.
|
2011-10-25 14:03:35 -07:00
|
|
|
* @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
|
|
|
|
* the nested %OVS_SAMPLE_ATTR_* attributes.
|
2013-01-25 16:22:07 +09:00
|
|
|
* @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
|
|
|
|
* single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
|
|
|
|
* value.
|
2014-09-05 15:44:19 -07:00
|
|
|
* @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header. A
|
|
|
|
* nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
|
|
|
|
* and a mask. For every bit set in the mask, the corresponding bit value
|
|
|
|
* is copied from the value to the packet header field, rest of the bits are
|
|
|
|
* left unchanged. The non-masked value bits must be passed in as zeroes.
|
|
|
|
* Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
|
2017-04-12 17:53:34 -07:00
|
|
|
* @OVS_ACTION_ATTR_RECIRC: Recirculate within the data path.
|
|
|
|
* @OVS_ACTION_ATTR_HASH: Compute and set flow hash value.
|
2013-01-25 16:22:07 +09:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
|
2014-02-04 12:39:37 -08:00
|
|
|
* top of the packets MPLS label stack. Set the ethertype of the
|
2013-01-25 16:22:07 +09:00
|
|
|
* encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
|
|
|
|
* indicate the new packet contents.
|
|
|
|
* @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the
|
|
|
|
* packet's MPLS label stack. Set the encapsulating frame's ethertype to
|
2014-02-04 12:39:37 -08:00
|
|
|
* indicate the new packet contents. This could potentially still be
|
|
|
|
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
|
2013-01-25 16:22:07 +09:00
|
|
|
* is no MPLS label stack, as determined by ethertype, no action is taken.
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
* @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
|
|
|
|
* entries in the flow key.
|
2017-02-06 21:04:41 +08:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the
|
|
|
|
* packet.
|
2017-05-06 15:49:43 +00:00
|
|
|
* @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the packet.
|
2018-01-19 14:21:51 -05:00
|
|
|
* @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet.
|
2018-01-06 13:47:51 +08:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
|
|
|
|
* @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
|
2011-10-21 14:38:54 -07:00
|
|
|
*
|
2011-11-14 15:56:43 -08:00
|
|
|
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
|
|
|
|
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
|
|
|
|
* type may not be changed.
|
2014-11-11 11:53:47 -08:00
|
|
|
*
|
2015-05-22 11:22:40 -07:00
|
|
|
* @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
|
|
|
|
* from the @OVS_ACTION_ATTR_SET.
|
2014-11-11 11:53:47 -08:00
|
|
|
* @OVS_ACTION_ATTR_TUNNEL_PUSH: Push tunnel header described by struct
|
|
|
|
* ovs_action_push_tnl.
|
|
|
|
* @OVS_ACTION_ATTR_TUNNEL_POP: Lookup tunnel port by port-no passed and pop
|
|
|
|
* tunnel header.
|
2017-02-23 11:27:54 -08:00
|
|
|
* @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
|
|
|
|
* packet, or modify the packet (e.g., change the DSCP field).
|
2018-12-14 14:32:23 -08:00
|
|
|
* @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
|
|
|
|
* actions without affecting the original packet and key.
|
Add a new OVS action check_pkt_larger
This patch adds a new action 'check_pkt_larger' which checks if the
packet is larger than the given size and stores the result in the
destination register.
Usage: check_pkt_larger(len)->REGISTER
Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next;
This patch makes use of the new datapath action - 'check_pkt_len'
which was recently added in the commit [1].
At the start of ovs-vswitchd, datapath is probed for this action.
If the datapath action is present, then 'check_pkt_larger'
makes use of this datapath action.
Datapath action 'check_pkt_len' takes these nlattrs
* OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions
to apply if the packet length is greater than the specified 'pkt_len'
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested
actions to apply if the packet length is lesser or equal to the
specified 'pkt_len'.
Let's say we have these flows added to an OVS bridge br-int
table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1)
table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3
table=1, priority=100,in_port=1,ip,actions=output:4
Then the action 'check_pkt_larger' will be translated as
- check_pkt_len(size=100,gt(3),le(4))
datapath will check the packet length and if the packet length is greater than 100,
it will output to port 3, else it will output to port 4.
In case, datapath doesn't support 'check_pkt_len' action, the OVS action
'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added.
This OVS action is intended to be used by OVN to check the packet length
and generate an ICMP packet with type 3, code 4 and next hop mtu
in the logical router pipeline if the MTU of the physical interface
is lesser than the packet length. More information can be found here [2]
[1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9
[2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
CC: Ben Pfaff <blp@ovn.org>
CC: Gregory Rose <gvrose8192@gmail.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
|
|
|
* @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set
|
|
|
|
* of actions if greater than the specified packet length, else execute
|
|
|
|
* another set of actions.
|
2021-11-29 11:52:05 +05:30
|
|
|
* @OVS_ACTION_ATTR_ADD_MPLS: Push a new MPLS label stack entry at the
|
|
|
|
* start of the packet or at the start of the l3 header depending on the value
|
|
|
|
* of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
|
|
|
|
* argument.
|
2019-12-18 05:48:12 +01:00
|
|
|
* @OVS_ACTION_ATTR_DROP: Explicit drop action.
|
2024-07-13 23:23:38 +02:00
|
|
|
* @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers
|
|
|
|
* via psample.
|
2011-10-21 14:38:54 -07:00
|
|
|
*/
|
|
|
|
|
2011-10-05 09:59:51 -07:00
|
|
|
enum ovs_action_attr {
|
2011-08-18 10:35:40 -07:00
|
|
|
OVS_ACTION_ATTR_UNSPEC,
|
2011-10-25 14:03:35 -07:00
|
|
|
OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
|
2011-10-12 16:24:54 -07:00
|
|
|
OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
|
2011-10-25 14:03:35 -07:00
|
|
|
OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
|
2011-11-14 15:56:43 -08:00
|
|
|
OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
|
|
|
|
OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
|
2011-10-12 16:24:54 -07:00
|
|
|
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
|
2014-11-08 07:24:42 -08:00
|
|
|
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
|
2014-04-08 18:42:39 -07:00
|
|
|
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
|
2014-07-24 02:17:48 -07:00
|
|
|
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
|
|
|
|
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
|
2014-09-05 15:44:19 -07:00
|
|
|
OVS_ACTION_ATTR_SET_MASKED, /* One nested OVS_KEY_ATTR_* including
|
|
|
|
* data immediately followed by a mask.
|
|
|
|
* The data must be zero for the unmasked
|
|
|
|
* bits. */
|
Add support for connection tracking.
This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.
Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.
Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.
The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:
- "commit": When commit is executed, the connection moves from
uncommitted state to committed state. This signals that information
about the connection should be stored beyond the lifetime of the
packet within the pipeline. This allows future packets in the same
connection to be recognized as part of the same "established" (est)
connection, as well as identifying packets in the reply (rpl)
direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
Each zone is an independent connection tracking context. When the
"commit" parameter is used, the connection will only be committed in
the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
of the packet will continue processing the current actions list as an
untracked packet. An additional instance of the packet will be sent to
the connection tracker, which will be re-injected into the OpenFlow
pipeline to resume processing in the specified table, with the
ct_state and other ct match fields set. If the table is not specified,
then the packet is submitted to the connection tracker, but the
pipeline does not fork and the ct match fields are not populated. It
is strongly recommended to specify a table later than the current
table to prevent loops.
When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:
- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.
For more information, consult the ovs-ofctl(8) man pages.
Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:
table=0,priority=1,action=drop
table=0,arp,action=normal
table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
table=1,in_port=2,ct_state=+trk+est,tcp,action=1
table=1,in_port=2,ct_state=+trk+new,tcp,action=drop
Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
|
|
|
OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */
|
2016-06-24 07:42:29 -07:00
|
|
|
OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */
|
2017-02-06 21:04:41 +08:00
|
|
|
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
|
|
|
|
OVS_ACTION_ATTR_POP_ETH, /* No argument. */
|
2018-01-19 14:21:51 -05:00
|
|
|
OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */
|
2018-01-31 21:53:06 +08:00
|
|
|
OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */
|
|
|
|
OVS_ACTION_ATTR_POP_NSH, /* No argument. */
|
2018-12-14 14:32:23 -08:00
|
|
|
OVS_ACTION_ATTR_METER, /* u32 meter number. */
|
|
|
|
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
|
Add a new OVS action check_pkt_larger
This patch adds a new action 'check_pkt_larger' which checks if the
packet is larger than the given size and stores the result in the
destination register.
Usage: check_pkt_larger(len)->REGISTER
Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next;
This patch makes use of the new datapath action - 'check_pkt_len'
which was recently added in the commit [1].
At the start of ovs-vswitchd, datapath is probed for this action.
If the datapath action is present, then 'check_pkt_larger'
makes use of this datapath action.
Datapath action 'check_pkt_len' takes these nlattrs
* OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions
to apply if the packet length is greater than the specified 'pkt_len'
* OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested
actions to apply if the packet length is lesser or equal to the
specified 'pkt_len'.
Let's say we have these flows added to an OVS bridge br-int
table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1)
table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3
table=1, priority=100,in_port=1,ip,actions=output:4
Then the action 'check_pkt_larger' will be translated as
- check_pkt_len(size=100,gt(3),le(4))
datapath will check the packet length and if the packet length is greater than 100,
it will output to port 3, else it will output to port 4.
In case, datapath doesn't support 'check_pkt_len' action, the OVS action
'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added.
This OVS action is intended to be used by OVN to check the packet length
and generate an ICMP packet with type 3, code 4 and next hop mtu
in the logical router pipeline if the MTU of the physical interface
is lesser than the packet length. More information can be found here [2]
[1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9
[2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
CC: Ben Pfaff <blp@ovn.org>
CC: Gregory Rose <gvrose8192@gmail.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
|
|
|
OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
|
2021-11-29 11:52:05 +05:30
|
|
|
OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
|
2024-04-03 10:35:28 -04:00
|
|
|
OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
|
2024-04-03 10:35:31 -04:00
|
|
|
OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */
|
2024-07-13 23:23:38 +02:00
|
|
|
OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */
|
2014-11-08 07:24:42 -08:00
|
|
|
|
2014-11-11 11:53:47 -08:00
|
|
|
#ifndef __KERNEL__
|
|
|
|
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
|
|
|
|
OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */
|
userspace: Avoid dp_hash recirculation for balance-tcp bond mode.
Problem:
In OVS, flows with output over a bond interface of type “balance-tcp”
gets translated by the ofproto layer into "HASH" and "RECIRC" datapath
actions. After recirculation, the packet is forwarded to the bond
member port based on 8-bits of the datapath hash value computed through
dp_hash. This causes performance degradation in the following ways:
1. The recirculation of the packet implies another lookup of the
packet’s flow key in the exact match cache (EMC) and potentially
Megaflow classifier (DPCLS). This is the biggest cost factor.
2. The recirculated packets have a new “RSS” hash and compete with the
original packets for the scarce number of EMC slots. This implies more
EMC misses and potentially EMC thrashing causing costly DPCLS lookups.
3. The 256 extra megaflow entries per bond for dp_hash bond selection
put additional load on the revalidation threads.
Owing to this performance degradation, deployments stick to “balance-slb”
bond mode even though it does not do active-active load balancing for
VXLAN- and GRE-tunnelled traffic because all tunnel packet have the
same source MAC address.
Proposed optimization:
This proposal introduces a new load-balancing output action instead of
recirculation.
Maintain one table per-bond (could just be an array of uint16's) and
program it the same way internal flows are created today for each
possible hash value (256 entries) from ofproto layer. Use this table to
load-balance flows as part of output action processing.
Currently xlate_normal() -> output_normal() ->
bond_update_post_recirc_rules() -> bond_may_recirc() and
compose_output_action__() generate 'dp_hash(hash_l4(0))' and
'recirc(<RecircID>)' actions. In this case the RecircID identifies the
bond. For the recirculated packets the ofproto layer installs megaflow
entries that match on RecircID and masked dp_hash and send them to the
corresponding output port.
Instead, we will now generate action as
'lb_output(<bond id>)'
This combines hash computation (only if needed, else re-use RSS hash)
and inline load-balancing over the bond. This action is used *only* for
balance-tcp bonds in userspace datapath (the OVS kernel datapath
remains unchanged).
Example:
Current scheme:
With 8 UDP flows (with random UDP src port):
flow-dump from pmd on cpu core: 2
recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1)
recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2
recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1
recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1
recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2
recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2
recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1
recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1
New scheme:
We can do with a single flow entry (for any number of new flows):
in_port(7),<...> actions:lb_output(1)
A new CLI has been added to dump datapath bond cache as given below.
# ovs-appctl dpif-netdev/bond-show [dp]
Bond cache:
bond-id 1 :
bucket 0 - slave 2
bucket 1 - slave 1
bucket 2 - slave 2
bucket 3 - slave 1
Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com>
Tested-by: Matteo Croce <mcroce@redhat.com>
Tested-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-05-22 10:50:05 +02:00
|
|
|
OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */
|
2014-11-11 11:53:47 -08:00
|
|
|
#endif
|
2015-05-22 11:22:40 -07:00
|
|
|
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
|
|
|
|
* from userspace. */
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
|
|
|
|
* set action converted from
|
|
|
|
* OVS_ACTION_ATTR_SET. */
|
|
|
|
#endif
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
|
|
|
|
2011-08-18 10:35:40 -07:00
|
|
|
#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2018-02-07 07:49:53 -08:00
|
|
|
/* Meters. */
|
|
|
|
#define OVS_METER_FAMILY "ovs_meter"
|
|
|
|
#define OVS_METER_MCGROUP "ovs_meter"
|
|
|
|
#define OVS_METER_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_meter_cmd {
|
|
|
|
OVS_METER_CMD_UNSPEC,
|
|
|
|
OVS_METER_CMD_FEATURES, /* Get features supported by the datapath. */
|
|
|
|
OVS_METER_CMD_SET, /* Add or modify a meter. */
|
|
|
|
OVS_METER_CMD_DEL, /* Delete a meter. */
|
|
|
|
OVS_METER_CMD_GET /* Get meter stats. */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ovs_meter_attr {
|
|
|
|
OVS_METER_ATTR_UNSPEC,
|
|
|
|
OVS_METER_ATTR_ID, /* u32 meter ID within datapath. */
|
|
|
|
OVS_METER_ATTR_KBPS, /* No argument. If set, units in kilobits
|
|
|
|
* per second. Otherwise, units in
|
|
|
|
* packets per second.
|
|
|
|
*/
|
|
|
|
OVS_METER_ATTR_STATS, /* struct ovs_flow_stats for the meter. */
|
|
|
|
OVS_METER_ATTR_BANDS, /* Nested attributes for meter bands. */
|
|
|
|
OVS_METER_ATTR_USED, /* u64 msecs last used in monotonic time. */
|
|
|
|
OVS_METER_ATTR_CLEAR, /* Flag to clear stats, used. */
|
|
|
|
OVS_METER_ATTR_MAX_METERS, /* u32 number of meters supported. */
|
|
|
|
OVS_METER_ATTR_MAX_BANDS, /* u32 max number of bands per meter. */
|
|
|
|
OVS_METER_ATTR_PAD,
|
|
|
|
__OVS_METER_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_METER_ATTR_MAX (__OVS_METER_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
enum ovs_band_attr {
|
|
|
|
OVS_BAND_ATTR_UNSPEC,
|
|
|
|
OVS_BAND_ATTR_TYPE, /* u32 OVS_METER_BAND_TYPE_* constant. */
|
|
|
|
OVS_BAND_ATTR_RATE, /* u32 band rate in meter units (see above). */
|
|
|
|
OVS_BAND_ATTR_BURST, /* u32 burst size in meter units. */
|
|
|
|
OVS_BAND_ATTR_STATS, /* struct ovs_flow_stats for the band. */
|
|
|
|
__OVS_BAND_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_BAND_ATTR_MAX (__OVS_BAND_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
enum ovs_meter_band_type {
|
|
|
|
OVS_METER_BAND_TYPE_UNSPEC,
|
|
|
|
OVS_METER_BAND_TYPE_DROP, /* Drop exceeding packets. */
|
|
|
|
__OVS_METER_BAND_TYPE_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_METER_BAND_TYPE_MAX (__OVS_METER_BAND_TYPE_MAX - 1)
|
|
|
|
|
2018-08-17 02:05:05 -07:00
|
|
|
/* Conntrack limit */
|
|
|
|
#define OVS_CT_LIMIT_FAMILY "ovs_ct_limit"
|
|
|
|
#define OVS_CT_LIMIT_MCGROUP "ovs_ct_limit"
|
|
|
|
#define OVS_CT_LIMIT_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_ct_limit_cmd {
|
|
|
|
OVS_CT_LIMIT_CMD_UNSPEC,
|
|
|
|
OVS_CT_LIMIT_CMD_SET, /* Add or modify ct limit. */
|
|
|
|
OVS_CT_LIMIT_CMD_DEL, /* Delete ct limit. */
|
|
|
|
OVS_CT_LIMIT_CMD_GET /* Get ct limit. */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ovs_ct_limit_attr {
|
|
|
|
OVS_CT_LIMIT_ATTR_UNSPEC,
|
|
|
|
OVS_CT_LIMIT_ATTR_ZONE_LIMIT, /* Nested struct ovs_zone_limit. */
|
|
|
|
__OVS_CT_LIMIT_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
#define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
|
|
|
|
|
|
|
|
struct ovs_zone_limit {
|
|
|
|
int zone_id;
|
|
|
|
__u32 limit;
|
|
|
|
__u32 count;
|
|
|
|
};
|
|
|
|
|
2024-04-03 10:35:28 -04:00
|
|
|
enum ovs_dec_ttl_attr {
|
|
|
|
OVS_DEC_TTL_ATTR_UNSPEC,
|
|
|
|
OVS_DEC_TTL_ATTR_ACTION, /* Nested struct nlattr */
|
|
|
|
__OVS_DEC_TTL_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
|
|
|
|
|
2018-12-14 14:32:23 -08:00
|
|
|
#define OVS_CLONE_ATTR_EXEC 0 /* Specify an u32 value. When nonzero,
|
|
|
|
* actions in clone will not change flow
|
|
|
|
* keys. False otherwise.
|
|
|
|
*/
|
|
|
|
|
2011-10-05 10:50:58 -07:00
|
|
|
#endif /* _LINUX_OPENVSWITCH_H */
|