2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 18:07:40 +00:00
ovs/include/linux/openvswitch.h

1192 lines
44 KiB
C
Raw Normal View History

/*
dpif-netdev: Add clone action Add support for userspace datapath clone action. The clone action provides an action envelope to enclose an action list. For example, with actions A, B, C and D, and an action list: A, clone(B, C), D The clone action will ensure that: - D will see the same packet, and any meta states, such as flow, as action B. - D will be executed regardless whether B, or C drops a packet. They can only drop a clone. - When B drops a packet, clone will skip all remaining actions within the clone envelope. This feature is useful when we add meter action later: The meter action can be implemented as a simple action without its own envolop (unlike the sample action). When necessary, the flow translation layer can enclose a meter action in clone. The clone action is very similar with the OpenFlow clone action. This is by design to simplify vswitchd flow translation logic. Without datapath clone, vswitchd simulate the effect by inserting datapath actions to "undo" clone actions. The above flow will be translated into A, B, C, -C, -B, D. However, there are two issues: - The resulting datapath action list may be longer without using clone. - Some actions, such as NAT may not be possible to reverse. This patch implements clone() simply with packet copy. The performance can be improved with later patches, for example, to delay or avoid packet copy if possible. It seems datapath should have enough context to carry out such optimization without the userspace context. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
* Copyright (c) 2007-2017 Nicira, Inc.
*
* This file is offered under your choice of two licenses: Apache 2.0 or GNU
* GPL 2.0 or later. The permission statements for each of these licenses is
* given below. You may license your modifications to this file under either
* of these licenses or both. If you wish to license your modifications under
* only one of these licenses, delete the permission text for the other
* license.
*
* ----------------------------------------------------------------------
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ----------------------------------------------------------------------
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
* ----------------------------------------------------------------------
*/
#ifndef _LINUX_OPENVSWITCH_H
#define _LINUX_OPENVSWITCH_H 1
#include <linux/types.h>
#include <linux/if_ether.h>
/**
* struct ovs_header - header for OVS Generic Netlink messages.
* @dp_ifindex: ifindex of local port for datapath (0 to make a request not
* specific to a datapath).
*
* Attributes following the header are specific to a particular OVS Generic
* Netlink family, but all of the OVS families use this header.
*/
struct ovs_header {
int dp_ifindex;
};
/* Datapaths. */
#define OVS_DATAPATH_FAMILY "ovs_datapath"
#define OVS_DATAPATH_MCGROUP "ovs_datapath"
/* V2:
* - API users are expected to provide OVS_DP_ATTR_USER_FEATURES
* when creating the datapath.
*/
#define OVS_DATAPATH_VERSION 2
/* First OVS datapath version to support features */
#define OVS_DP_VER_FEATURES 2
enum ovs_datapath_cmd {
OVS_DP_CMD_UNSPEC,
OVS_DP_CMD_NEW,
OVS_DP_CMD_DEL,
OVS_DP_CMD_GET,
OVS_DP_CMD_SET
};
/**
* enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
* @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
* port". This is the name of the network device whose dp_ifindex is given in
* the &struct ovs_header. Always present in notifications. Required in
* %OVS_DP_NEW requests. May be used as an alternative to specifying
* dp_ifindex in other requests (with a dp_ifindex of 0).
* @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
* set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
* %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
* not be sent.
dpif-netlink: Introduce per-cpu upcall dispatch. The Open vSwitch kernel module uses the upcall mechanism to send packets from kernel space to user space when it misses in the kernel space flow table. The upcall sends packets via a Netlink socket. Currently, a Netlink socket is created for every vport. In this way, there is a 1:1 mapping between a vport and a Netlink socket. When a packet is received by a vport, if it needs to be sent to user space, it is sent via the corresponding Netlink socket. This mechanism, with various iterations of the corresponding user space code, has seen some limitations and issues: * On systems with a large number of vports, there is correspondingly a large number of Netlink sockets which can limit scaling. (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) * Packet reordering on upcalls. (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) * A thundering herd issue. (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) This patch introduces an alternative, feature-negotiated, upcall mode using a per-cpu dispatch rather than a per-vport dispatch. In this mode, the Netlink socket to be used for the upcall is selected based on the CPU of the thread that is executing the upcall. In this way, it resolves the issues above as: a) The number of Netlink sockets scales with the number of CPUs rather than the number of vports. b) Ordering per-flow is maintained as packets are distributed to CPUs based on mechanisms such as RSS and flows are distributed to a single user space thread. c) Packets from a flow can only wake up one user space thread. Reported-at: https://bugzilla.redhat.com/1844576 Signed-off-by: Mark Gray <mark.d.gray@redhat.com> Acked-by: Flavio Leitner <fbl@sysclose.org> Acked-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
* @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
* OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
* @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
* datapath. Always present in notifications.
* @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
* datapath. Always present in notifications.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_DP_* commands.
*/
enum ovs_datapath_attr {
OVS_DP_ATTR_UNSPEC,
OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */
OVS_DP_ATTR_PAD,
OVS_DP_ATTR_MASKS_CACHE_SIZE,
dpif-netlink: Introduce per-cpu upcall dispatch. The Open vSwitch kernel module uses the upcall mechanism to send packets from kernel space to user space when it misses in the kernel space flow table. The upcall sends packets via a Netlink socket. Currently, a Netlink socket is created for every vport. In this way, there is a 1:1 mapping between a vport and a Netlink socket. When a packet is received by a vport, if it needs to be sent to user space, it is sent via the corresponding Netlink socket. This mechanism, with various iterations of the corresponding user space code, has seen some limitations and issues: * On systems with a large number of vports, there is correspondingly a large number of Netlink sockets which can limit scaling. (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) * Packet reordering on upcalls. (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) * A thundering herd issue. (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) This patch introduces an alternative, feature-negotiated, upcall mode using a per-cpu dispatch rather than a per-vport dispatch. In this mode, the Netlink socket to be used for the upcall is selected based on the CPU of the thread that is executing the upcall. In this way, it resolves the issues above as: a) The number of Netlink sockets scales with the number of CPUs rather than the number of vports. b) Ordering per-flow is maintained as packets are distributed to CPUs based on mechanisms such as RSS and flows are distributed to a single user space thread. c) Packets from a flow can only wake up one user space thread. Reported-at: https://bugzilla.redhat.com/1844576 Signed-off-by: Mark Gray <mark.d.gray@redhat.com> Acked-by: Flavio Leitner <fbl@sysclose.org> Acked-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls */
__OVS_DP_ATTR_MAX
};
#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
/* All 64-bit integers within Netlink messages are 4-byte aligned only. */
struct ovs_dp_stats {
__u64 n_hit; /* Number of flow table matches. */
__u64 n_missed; /* Number of flow table misses. */
__u64 n_lost; /* Number of misses not sent to userspace. */
__u64 n_flows; /* Number of flows present */
};
struct ovs_dp_megaflow_stats {
__u64 n_mask_hit; /* Number of masks used for flow lookups. */
__u32 n_masks; /* Number of masks for the datapath. */
__u32 pad0; /* Pad for future expension. */
__u64 n_cache_hit; /* Number of cache matches for flow lookups. */
__u64 pad1; /* Pad for future expension. */
};
struct ovs_vport_stats {
__u64 rx_packets; /* total packets received */
__u64 tx_packets; /* total packets transmitted */
__u64 rx_bytes; /* total bytes received */
__u64 tx_bytes; /* total bytes transmitted */
__u64 rx_errors; /* bad packets received */
__u64 tx_errors; /* packet transmit problems */
__u64 rx_dropped; /* no space in linux buffers */
__u64 tx_dropped; /* no space available in linux */
};
/* Allow last Netlink attribute to be unaligned */
#define OVS_DP_F_UNALIGNED (1 << 0)
/* Allow datapath to associate multiple Netlink PIDs to each vport */
#define OVS_DP_F_VPORT_PIDS (1 << 1)
/* Allow tc offload recirc sharing */
#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
dpif-netlink: Introduce per-cpu upcall dispatch. The Open vSwitch kernel module uses the upcall mechanism to send packets from kernel space to user space when it misses in the kernel space flow table. The upcall sends packets via a Netlink socket. Currently, a Netlink socket is created for every vport. In this way, there is a 1:1 mapping between a vport and a Netlink socket. When a packet is received by a vport, if it needs to be sent to user space, it is sent via the corresponding Netlink socket. This mechanism, with various iterations of the corresponding user space code, has seen some limitations and issues: * On systems with a large number of vports, there is correspondingly a large number of Netlink sockets which can limit scaling. (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) * Packet reordering on upcalls. (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) * A thundering herd issue. (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) This patch introduces an alternative, feature-negotiated, upcall mode using a per-cpu dispatch rather than a per-vport dispatch. In this mode, the Netlink socket to be used for the upcall is selected based on the CPU of the thread that is executing the upcall. In this way, it resolves the issues above as: a) The number of Netlink sockets scales with the number of CPUs rather than the number of vports. b) Ordering per-flow is maintained as packets are distributed to CPUs based on mechanisms such as RSS and flows are distributed to a single user space thread. c) Packets from a flow can only wake up one user space thread. Reported-at: https://bugzilla.redhat.com/1844576 Signed-off-by: Mark Gray <mark.d.gray@redhat.com> Acked-by: Flavio Leitner <fbl@sysclose.org> Acked-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-07-16 06:17:36 -04:00
/* Allow per-cpu dispatch of upcalls */
#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
/* Fixed logical ports. */
#define OVSP_LOCAL ((__u32)0)
/* Packet transfer. */
#define OVS_PACKET_FAMILY "ovs_packet"
#define OVS_PACKET_VERSION 0x1
enum ovs_packet_cmd {
OVS_PACKET_CMD_UNSPEC,
/* Kernel-to-user notifications. */
OVS_PACKET_CMD_MISS, /* Flow table miss. */
OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
/* Userspace commands. */
OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
};
/**
* enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
* @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
* packet as received, from the start of the Ethernet header onward. For
* %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
* actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
* the flow key extracted from the packet as originally received.
* @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
* extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
* userspace to adapt its flow setup strategy by comparing its notion of the
* flow key against the kernel's. When used with %OVS_PACKET_CMD_EXECUTE, only
* metadata key fields (e.g. priority, skb mark) are honored. All the packet
* header fields are parsed from the packet instead.
* @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
* for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
* Also used in upcall when %OVS_ACTION_ATTR_USERSPACE has optional
* %OVS_USERSPACE_ATTR_ACTIONS attribute.
* @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
* %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
* specified there.
* @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
* %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
* output port is actually a tunnel port. Contains the output tunnel key
* extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
* @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
* @OVS_PACKET_ATTR_LEN: Packet size before truncation.
* %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
* size.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_PACKET_* commands.
*/
enum ovs_packet_attr {
OVS_PACKET_ATTR_UNSPEC,
OVS_PACKET_ATTR_PACKET, /* Packet data. */
OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_*
attributes. */
OVS_PACKET_ATTR_UNUSED1,
OVS_PACKET_ATTR_UNUSED2,
OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe,
error logging should be suppressed. */
OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */
OVS_PACKET_ATTR_HASH, /* Packet hash. */
__OVS_PACKET_ATTR_MAX
};
#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
/* Virtual ports. */
#define OVS_VPORT_FAMILY "ovs_vport"
#define OVS_VPORT_MCGROUP "ovs_vport"
#define OVS_VPORT_VERSION 0x1
enum ovs_vport_cmd {
OVS_VPORT_CMD_UNSPEC,
OVS_VPORT_CMD_NEW,
OVS_VPORT_CMD_DEL,
OVS_VPORT_CMD_GET,
OVS_VPORT_CMD_SET
};
enum ovs_vport_type {
OVS_VPORT_TYPE_UNSPEC,
OVS_VPORT_TYPE_NETDEV, /* network device */
OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */
OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */
OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */
OVS_VPORT_TYPE_STT = 106, /* STT tunnel */
compat: Add ipv6 GRE and IPV6 Tunneling This patch backports upstream ipv6 GRE and tunneling into the OVS OOT (Out of Tree) datapath drivers. The primary reason for this is to support the ERSPAN feature. Because there is no previous history of ipv6 GRE and tunneling it is not possible to exactly reproduce the history of all the files in the patch. The two newly added files - ip6_gre.c and ip6_tunnel.c - are cut from whole cloth out of the upstream Linux 4.15 kernel and then modified as necessary with compatibility layer fixups. These two files already included parts of several other upstream commits that also touched other upstream files. As such, this patch may incorporate parts or all of the following commits: d350a82 net: erspan: create erspan metadata uapi header c69de58 net: erspan: use bitfield instead of mask and offset b423d13 net: erspan: fix use-after-free 214bb1c net: erspan: remove md NULL check afb4c97 ip6_gre: fix potential memory leak in ip6erspan_rcv 50670b6 ip_gre: fix potential memory leak in erspan_rcv a734321 ip6_gre: fix error path when ip6erspan_rcv failed dd8d5b8 ip_gre: fix error path when erspan_rcv failed 293a199 ip6_gre: fix a pontential issue in ip6erspan_rcv d91e8db5 net: erspan: reload pointer after pskb_may_pull ae3e133 net: erspan: fix wrong return value c05fad5 ip_gre: fix wrong return value of erspan_rcv 94d7d8f ip6_gre: add erspan v2 support f551c91 net: erspan: introduce erspan v2 for ip_gre 1d7e2ed net: erspan: refactor existing erspan code ef7baf5 ip6_gre: add ip6 erspan collect_md mode 5a963eb ip6_gre: Add ERSPAN native tunnel support ceaa001 openvswitch: Add erspan tunnel support. f192970 ip_gre: check packet length and mtu correctly in erspan tx c84bed4 ip_gre: erspan device should keep dst c122fda ip_gre: set tunnel hlen properly in erspan_tunnel_init 5513d08 ip_gre: check packet length and mtu correctly in erspan_xmit 935a974 ip_gre: get key from session_id correctly in erspan_rcv 1a66a83 gre: add collect_md mode to ERSPAN tunnel 84e54fe gre: introduce native tunnel support for ERSPAN In cases where the listed commits also touched other source code files then the patches are also listed separately within this patch series. Signed-off-by: Greg Rose <gvrose8192@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org> Acked-by: William Tu <u9012063@gmail.com>
2018-03-05 10:11:57 -08:00
OVS_VPORT_TYPE_ERSPAN = 107, /* ERSPAN tunnel. */
OVS_VPORT_TYPE_IP6ERSPAN = 108, /* ERSPAN tunnel. */
OVS_VPORT_TYPE_IP6GRE = 109,
OVS_VPORT_TYPE_GTPU = 110,
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
OVS_VPORT_TYPE_BAREUDP = 111, /* Bareudp tunnel. */
OVS_VPORT_TYPE_SRV6 = 112, /* SRv6 tunnel. */
__OVS_VPORT_TYPE_MAX
};
#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
2011-01-10 13:12:12 -08:00
/**
* enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
* @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
* @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
* of vport.
* @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
* this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
* plus a null terminator.
* @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
* @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
* among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
* received on this port. If this is a single-element array of value 0,
* upcalls should not be sent.
* @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
* packets sent or received through the vport.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_VPORT_* commands.
2011-01-10 13:12:12 -08:00
*
* For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
* %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
* optional; if not specified a free port number is automatically selected.
* Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
* of vport. %OVS_VPORT_ATTR_STATS is optional and other attributes are
* ignored.
*
* For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
* look up the vport to operate on; otherwise dp_idx from the &struct
* ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
2011-01-10 13:12:12 -08:00
*/
enum ovs_vport_attr {
OVS_VPORT_ATTR_UNSPEC,
OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
/* receiving upcalls */
OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
OVS_VPORT_ATTR_PAD,
OVS_VPORT_ATTR_IFINDEX,
OVS_VPORT_ATTR_NETNSID,
OVS_VPORT_ATTR_UPCALL_STATS,
__OVS_VPORT_ATTR_MAX
};
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
/**
* enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands
* @OVS_VPORT_UPCALL_ATTR_SUCCESS: 64-bit upcall success packets.
* @OVS_VPORT_UPCALL_ATTR_FAIL: 64-bit upcall fail packets.
*/
enum ovs_vport_upcall_attr {
OVS_VPORT_UPCALL_ATTR_SUCCESS,
OVS_VPORT_UPCALL_ATTR_FAIL,
__OVS_VPORT_UPCALL_ATTR_MAX,
};
#define OVS_VPORT_UPCALL_ATTR_MAX (__OVS_VPORT_UPCALL_ATTR_MAX - 1)
enum {
OVS_VXLAN_EXT_UNSPEC,
OVS_VXLAN_EXT_GBP,
/* place new values here to fill gap. */
OVS_VXLAN_EXT_GPE = 8,
__OVS_VXLAN_EXT_MAX,
};
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Acked-By: Greg Rose <gvrose8192@gmail.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-12-17 12:48:41 +05:30
enum {
OVS_BAREUDP_EXT_UNSPEC,
OVS_BAREUDP_EXT_MULTIPROTO_MODE,
__OVS_BAREUDP_EXT_MAX,
};
#define OVS_BAREUDP_EXT_MAX (__OVS_BAREUDP_EXT_MAX - 1)
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
*/
enum {
OVS_TUNNEL_ATTR_UNSPEC,
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
OVS_TUNNEL_ATTR_EXTENSION,
__OVS_TUNNEL_ATTR_MAX
};
#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1)
/* Flows. */
#define OVS_FLOW_FAMILY "ovs_flow"
#define OVS_FLOW_MCGROUP "ovs_flow"
#define OVS_FLOW_VERSION 0x1
enum ovs_flow_cmd {
OVS_FLOW_CMD_UNSPEC,
OVS_FLOW_CMD_NEW,
OVS_FLOW_CMD_DEL,
OVS_FLOW_CMD_GET,
OVS_FLOW_CMD_SET
};
struct ovs_flow_stats {
__u64 n_packets; /* Number of matched packets. */
__u64 n_bytes; /* Number of matched bytes. */
};
enum ovs_key_attr {
OVS_KEY_ATTR_UNSPEC,
OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
is not computed by the datapath. */
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
OVS_KEY_ATTR_CT_STATE, /* u32 bitmask of OVS_CS_F_* */
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */
datapath: Add original direction conntrack tuple to sw_flow_key. Upstream commit: commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc Author: Jarno Rajahalme <jarno@ovn.org> Date: Thu Feb 9 11:21:59 2017 -0800 openvswitch: Add original direction conntrack tuple to sw_flow_key. Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> This patch squashes in minimal amount of OVS userspace code to not break the build. Later patches contain the full userspace support. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-03-08 17:18:22 -08:00
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */
openvswitch.h: Align uAPI definition with the kernel. Upstream commit: commit 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Author: Ilya Maximets <i.maximets@ovn.org> Date: Wed Mar 9 23:20:33 2022 +0100 net: openvswitch: fix uAPI incompatibility with existing user space Few years ago OVS user space made a strange choice in the commit [1] to define types only valid for the user space inside the copy of a kernel uAPI header. '#ifndef __KERNEL__' and another attribute was added later. This leads to the inevitable clash between user space and kernel types when the kernel uAPI is extended. The issue was unveiled with the addition of a new type for IPv6 extension header in kernel uAPI. When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the older user space application, application tries to parse it as OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as malformed. Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with every IPv6 packet that goes to the user space, IPv6 support is fully broken. Fixing that by bringing these user space attributes to the kernel uAPI to avoid the clash. Strictly speaking this is not the problem of the kernel uAPI, but changing it is the only way to avoid breakage of the older user space applications at this point. These 2 types are explicitly rejected now since they should not be passed to the kernel. Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved out from the '#ifdef __KERNEL__' as there is no good reason to hide it from the userspace. And it's also explicitly rejected now, because it's for in-kernel use only. Comments with warnings were added to avoid the problem coming back. (1 << type) converted to (1ULL << type) to avoid integer overflow on OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now. [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline") Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support") Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c Reported-by: Roi Dayan <roid@nvidia.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Acked-by: Aaron Conole <aconole@redhat.com> Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> Not adding OVS_KEY_ATTR_IPV6_EXTHDRS in this commit as this is not necessary. Will be added along with the actual userspace implementation. This change should help avoiding incompatibility issues in the future. Acked-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-25 14:48:23 +01:00
/* User space decided to squat on types 29 and 30. They are defined
* below, but should not be sent to the kernel.
*
* WARNING: No new types should be added unless they are defined
* for both kernel and user space (no 'ifdef's). It's hard
* to keep compatibility otherwise.
*/
OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */
OVS_KEY_ATTR_ND_EXTENSIONS, /* struct ovs_key_nd_extensions */
openvswitch.h: Align uAPI definition with the kernel. Upstream commit: commit 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Author: Ilya Maximets <i.maximets@ovn.org> Date: Wed Mar 9 23:20:33 2022 +0100 net: openvswitch: fix uAPI incompatibility with existing user space Few years ago OVS user space made a strange choice in the commit [1] to define types only valid for the user space inside the copy of a kernel uAPI header. '#ifndef __KERNEL__' and another attribute was added later. This leads to the inevitable clash between user space and kernel types when the kernel uAPI is extended. The issue was unveiled with the addition of a new type for IPv6 extension header in kernel uAPI. When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the older user space application, application tries to parse it as OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as malformed. Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with every IPv6 packet that goes to the user space, IPv6 support is fully broken. Fixing that by bringing these user space attributes to the kernel uAPI to avoid the clash. Strictly speaking this is not the problem of the kernel uAPI, but changing it is the only way to avoid breakage of the older user space applications at this point. These 2 types are explicitly rejected now since they should not be passed to the kernel. Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved out from the '#ifdef __KERNEL__' as there is no good reason to hide it from the userspace. And it's also explicitly rejected now, because it's for in-kernel use only. Comments with warnings were added to avoid the problem coming back. (1 << type) converted to (1ULL << type) to avoid integer overflow on OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now. [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline") Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support") Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c Reported-by: Roi Dayan <roid@nvidia.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Acked-by: Aaron Conole <aconole@redhat.com> Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> Not adding OVS_KEY_ATTR_IPV6_EXTHDRS in this commit as this is not necessary. Will be added along with the actual userspace implementation. This change should help avoiding incompatibility issues in the future. Acked-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-25 14:48:23 +01:00
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info.
* For in-kernel use only.
*/
__OVS_KEY_ATTR_MAX
};
#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */
OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */
OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */
OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */
OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
OVS_TUNNEL_KEY_ATTR_PAD,
OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
#ifndef __KERNEL__
/* Only used within userspace data path. */
OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, /* struct gtpu_metadata */
#endif
__OVS_TUNNEL_KEY_ATTR_MAX
};
#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1)
userspace: Improved packet drop statistics. Currently OVS maintains explicit packet drop/error counters only on port level. Packets that are dropped as part of normal OpenFlow processing are counted in flow stats of “drop” flows or as table misses in table stats. These can only be interpreted by controllers that know the semantics of the configured OpenFlow pipeline. Without that knowledge, it is impossible for an OVS user to obtain e.g. the total number of packets dropped due to OpenFlow rules. Furthermore, there are numerous other reasons for which packets can be dropped by OVS slow path that are not related to the OpenFlow pipeline. The generated datapath flow entries include a drop action to avoid further expensive upcalls to the slow path, but subsequent packets dropped by the datapath are not accounted anywhere. Finally, the datapath itself drops packets in certain error situations. Also, these drops are today not accounted for.This makes it difficult for OVS users to monitor packet drop in an OVS instance and to alert a management system in case of a unexpected increase of such drops. Also OVS trouble-shooters face difficulties in analysing packet drops. With this patch we implement following changes to address the issues mentioned above. 1. Identify and account all the silent packet drop scenarios 2. Display these drops in ovs-appctl coverage/show Co-authored-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Co-authored-by: Keshav Gupta <keshugupta1@gmail.com> Signed-off-by: Anju Thomas <anju.thomas@ericsson.com> Signed-off-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Signed-off-by: Keshav Gupta <keshugupta1@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com Acked-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2019-12-18 05:48:12 +01:00
/**
* enum xlate_error - Different types of error during translation
*/
#ifndef __KERNEL__
enum xlate_error {
XLATE_OK = 0,
XLATE_BRIDGE_NOT_FOUND,
XLATE_RECURSION_TOO_DEEP,
XLATE_TOO_MANY_RESUBMITS,
XLATE_STACK_TOO_DEEP,
XLATE_NO_RECIRCULATION_CONTEXT,
XLATE_RECIRCULATION_CONFLICT,
XLATE_TOO_MANY_MPLS_LABELS,
XLATE_INVALID_TUNNEL_METADATA,
XLATE_UNSUPPORTED_PACKET_TYPE,
XLATE_CONGESTION_DROP,
XLATE_FORWARDING_DISABLED,
XLATE_MAX,
};
#endif
Implement new fragment handling policy. Until now, OVS has handled IP fragments more awkwardly than necessary. It has not been possible to match on L4 headers, even in fragments with offset 0 where they are actually present. This means that there was no way to implement ACLs that treat, say, different TCP ports differently, on fragmented traffic; instead, all decisions for fragment forwarding had to be made on the basis of L2 and L3 headers alone. This commit improves the situation significantly. It is still not possible to match on L4 headers in fragments with nonzero offset, because that information is simply not present in such fragments, but this commit adds the ability to match on L4 headers for fragments with zero offset. This means that it becomes possible to implement ACLs that drop such "first fragments" on the basis of L4 headers. In practice, that effectively blocks even fragmented traffic on an L4 basis, because the receiving IP stack cannot reassemble a full packet when the first fragment is missing. This commit works by adding a new "fragment type" to the kernel flow match and making it available through OpenFlow as a new NXM field named NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields are always 0 for IP fragments, it adds a new OpenFlow fragment handling mode that fills in the L4 fields for "first fragments". It also enhances ovs-ofctl to allow users to configure this new fragment handling mode and to parse the new field. Signed-off-by: Ben Pfaff <blp@nicira.com> Bug #7557.
2011-10-19 21:33:44 -07:00
/**
* enum ovs_frag_type - IPv4 and IPv6 fragment type
* @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
* @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
* @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
*
* Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
* ovs_key_ipv6.
*/
enum ovs_frag_type {
OVS_FRAG_TYPE_NONE,
OVS_FRAG_TYPE_FIRST,
OVS_FRAG_TYPE_LATER,
__OVS_FRAG_TYPE_MAX
};
#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
struct ovs_key_ethernet {
__u8 eth_src[ETH_ALEN];
__u8 eth_dst[ETH_ALEN];
};
struct ovs_key_mpls {
__be32 mpls_lse;
};
struct ovs_key_ipv4 {
__be32 ipv4_src;
__be32 ipv4_dst;
__u8 ipv4_proto;
__u8 ipv4_tos;
__u8 ipv4_ttl;
Implement new fragment handling policy. Until now, OVS has handled IP fragments more awkwardly than necessary. It has not been possible to match on L4 headers, even in fragments with offset 0 where they are actually present. This means that there was no way to implement ACLs that treat, say, different TCP ports differently, on fragmented traffic; instead, all decisions for fragment forwarding had to be made on the basis of L2 and L3 headers alone. This commit improves the situation significantly. It is still not possible to match on L4 headers in fragments with nonzero offset, because that information is simply not present in such fragments, but this commit adds the ability to match on L4 headers for fragments with zero offset. This means that it becomes possible to implement ACLs that drop such "first fragments" on the basis of L4 headers. In practice, that effectively blocks even fragmented traffic on an L4 basis, because the receiving IP stack cannot reassemble a full packet when the first fragment is missing. This commit works by adding a new "fragment type" to the kernel flow match and making it available through OpenFlow as a new NXM field named NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields are always 0 for IP fragments, it adds a new OpenFlow fragment handling mode that fills in the L4 fields for "first fragments". It also enhances ovs-ofctl to allow users to configure this new fragment handling mode and to parse the new field. Signed-off-by: Ben Pfaff <blp@nicira.com> Bug #7557.
2011-10-19 21:33:44 -07:00
__u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
};
struct ovs_key_ipv6 {
__be32 ipv6_src[4];
__be32 ipv6_dst[4];
__be32 ipv6_label; /* 20-bits in least-significant bits. */
__u8 ipv6_proto;
__u8 ipv6_tclass;
__u8 ipv6_hlimit;
Implement new fragment handling policy. Until now, OVS has handled IP fragments more awkwardly than necessary. It has not been possible to match on L4 headers, even in fragments with offset 0 where they are actually present. This means that there was no way to implement ACLs that treat, say, different TCP ports differently, on fragmented traffic; instead, all decisions for fragment forwarding had to be made on the basis of L2 and L3 headers alone. This commit improves the situation significantly. It is still not possible to match on L4 headers in fragments with nonzero offset, because that information is simply not present in such fragments, but this commit adds the ability to match on L4 headers for fragments with zero offset. This means that it becomes possible to implement ACLs that drop such "first fragments" on the basis of L4 headers. In practice, that effectively blocks even fragmented traffic on an L4 basis, because the receiving IP stack cannot reassemble a full packet when the first fragment is missing. This commit works by adding a new "fragment type" to the kernel flow match and making it available through OpenFlow as a new NXM field named NXM_NX_IP_FRAG. Because OpenFlow 1.0 explicitly says that the L4 fields are always 0 for IP fragments, it adds a new OpenFlow fragment handling mode that fills in the L4 fields for "first fragments". It also enhances ovs-ofctl to allow users to configure this new fragment handling mode and to parse the new field. Signed-off-by: Ben Pfaff <blp@nicira.com> Bug #7557.
2011-10-19 21:33:44 -07:00
__u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
};
struct ovs_key_tcp {
__be16 tcp_src;
__be16 tcp_dst;
};
struct ovs_key_udp {
__be16 udp_src;
__be16 udp_dst;
};
struct ovs_key_sctp {
__be16 sctp_src;
__be16 sctp_dst;
};
struct ovs_key_icmp {
__u8 icmp_type;
__u8 icmp_code;
};
struct ovs_key_icmpv6 {
__u8 icmpv6_type;
__u8 icmpv6_code;
};
struct ovs_key_arp {
__be32 arp_sip;
__be32 arp_tip;
__be16 arp_op;
__u8 arp_sha[ETH_ALEN];
__u8 arp_tha[ETH_ALEN];
};
struct ovs_key_nd {
__be32 nd_target[4];
__u8 nd_sll[ETH_ALEN];
__u8 nd_tll[ETH_ALEN];
};
#ifndef __KERNEL__
struct ovs_key_nd_extensions {
__be32 nd_reserved;
__u8 nd_options_type;
};
#endif
#define OVS_CT_LABELS_LEN_32 4
#define OVS_CT_LABELS_LEN (OVS_CT_LABELS_LEN_32 * sizeof(__u32))
struct ovs_key_ct_labels {
union {
__u8 ct_labels[OVS_CT_LABELS_LEN];
__u32 ct_labels_32[OVS_CT_LABELS_LEN_32];
};
};
enum ovs_nsh_key_attr {
OVS_NSH_KEY_ATTR_UNSPEC,
OVS_NSH_KEY_ATTR_BASE, /* struct ovs_nsh_key_base. */
OVS_NSH_KEY_ATTR_MD1, /* struct ovs_nsh_key_md1. */
OVS_NSH_KEY_ATTR_MD2, /* variable-length octets. */
__OVS_NSH_KEY_ATTR_MAX
};
#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1)
struct ovs_nsh_key_base {
__u8 flags;
__u8 ttl;
__u8 mdtype;
__u8 np;
__be32 path_hdr;
};
#define NSH_MD1_CONTEXT_SIZE 4
struct ovs_nsh_key_md1 {
__be32 context[NSH_MD1_CONTEXT_SIZE];
};
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
/* OVS_KEY_ATTR_CT_STATE flags */
#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */
#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */
#define OVS_CS_F_RELATED 0x04 /* Related to an established
* connection. */
#define OVS_CS_F_REPLY_DIR 0x08 /* Flow is in the reply direction. */
#define OVS_CS_F_INVALID 0x10 /* Could not track connection. */
#define OVS_CS_F_TRACKED 0x20 /* Conntrack has occurred. */
#define OVS_CS_F_SRC_NAT 0x40 /* Packet's source address/port was
mangled by NAT. */
#define OVS_CS_F_DST_NAT 0x80 /* Packet's destination address/port
was mangled by NAT. */
#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
datapath: Add original direction conntrack tuple to sw_flow_key. Upstream commit: commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc Author: Jarno Rajahalme <jarno@ovn.org> Date: Thu Feb 9 11:21:59 2017 -0800 openvswitch: Add original direction conntrack tuple to sw_flow_key. Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> This patch squashes in minimal amount of OVS userspace code to not break the build. Later patches contain the full userspace support. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-03-08 17:18:22 -08:00
struct ovs_key_ct_tuple_ipv4 {
__be32 ipv4_src;
__be32 ipv4_dst;
__be16 src_port;
__be16 dst_port;
__u8 ipv4_proto;
};
struct ovs_key_ct_tuple_ipv6 {
__be32 ipv6_src[4];
__be32 ipv6_dst[4];
__be16 src_port;
__be16 dst_port;
__u8 ipv6_proto;
};
/**
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
* key. Always present in notifications. Required for all requests (except
* dumps).
* @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
* the actions to take for packets that match the key. Always present in
* notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
* %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without
* %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions,
* an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given.
* @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
* flow. Present in notifications if the stats would be nonzero. Ignored in
* requests.
* @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
* TCP flags seen on packets in this flow. Only present in notifications for
* TCP flows, and only if it would be nonzero. Ignored in requests.
* @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
* the system monotonic clock, at which a packet was last processed for this
* flow. Only present in notifications if a packet has been processed for this
* flow. Ignored in requests.
* @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
* last-used time, accumulated TCP flags, and statistics for this flow.
* Otherwise ignored in requests. Never present in notifications.
* @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
* mask bits for wildcarded flow match. Mask bit value '1' specifies exact
* match with corresponding flow key bit, while mask bit value '0' specifies
* a wildcarded match. Omitting attribute is treated as wildcarding all
* corresponding fields. Optional for all requests. If not present,
* all flow key bits are exact match bits.
* @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
* identifier for the flow. Causes the flow to be indexed by this value rather
* than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
* requests. Present in notifications if the flow was created with this
* attribute.
* @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
* flags that provide alternative semantics for flow installation and
* retrieval. Optional for all requests.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_FLOW_* commands.
*/
enum ovs_flow_attr {
OVS_FLOW_ATTR_UNSPEC,
OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
* logging should be suppressed. */
OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */
OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
OVS_FLOW_ATTR_PAD,
__OVS_FLOW_ATTR_MAX
};
#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
/**
* Omit attributes for notifications.
*
* If a datapath request contains an OVS_UFID_F_OMIT_* flag, then the datapath
* may omit the corresponding 'ovs_flow_attr' from the response.
*/
#define OVS_UFID_F_OMIT_KEY (1 << 0)
#define OVS_UFID_F_OMIT_MASK (1 << 1)
#define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
/**
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
* @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
* %UINT32_MAX samples all packets and intermediate values sample intermediate
* fractions of packets.
* @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
* Actions are passed as nested attributes.
*
* Executes the specified actions with the given probability on a per-packet
* basis.
*/
enum ovs_sample_attr {
OVS_SAMPLE_ATTR_UNSPEC,
OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
__OVS_SAMPLE_ATTR_MAX,
datapath: openvswitch: Optimize sample action for the clone use cases Upstream commit: openvswitch: Optimize sample action for the clone use cases With the introduction of open flow 'clone' action, the OVS user space can now translate the 'clone' action into kernel datapath 'sample' action, with 100% probability, to ensure that the clone semantics, which is that the packet seen by the clone action is the same as the packet seen by the action after clone, is faithfully carried out in the datapath. While the sample action in the datpath has the matching semantics, its implementation is only optimized for its original use. Specifically, there are two limitation: First, there is a 3 level of nesting restriction, enforced at the flow downloading time. This limit turns out to be too restrictive for the 'clone' use case. Second, the implementation avoid recursive call only if the sample action list has a single userspace action. The main optimization implemented in this series removes the static nesting limit check, instead, implement the run time recursion limit check, and recursion avoidance similar to that of the 'recirc' action. This optimization solve both #1 and #2 issues above. One related optimization attempts to avoid copying flow key as long as the actions enclosed does not change the flow key. The detection is performed only once at the flow downloading time. Another related optimization is to rewrite the action list at flow downloading time in order to save the fast path from parsing the sample action list in its original form repeatedly. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> Upstream: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-04-06 13:05:48 -07:00
#ifdef __KERNEL__
OVS_SAMPLE_ATTR_ARG /* struct sample_arg */
#endif
};
#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
datapath: openvswitch: Optimize sample action for the clone use cases Upstream commit: openvswitch: Optimize sample action for the clone use cases With the introduction of open flow 'clone' action, the OVS user space can now translate the 'clone' action into kernel datapath 'sample' action, with 100% probability, to ensure that the clone semantics, which is that the packet seen by the clone action is the same as the packet seen by the action after clone, is faithfully carried out in the datapath. While the sample action in the datpath has the matching semantics, its implementation is only optimized for its original use. Specifically, there are two limitation: First, there is a 3 level of nesting restriction, enforced at the flow downloading time. This limit turns out to be too restrictive for the 'clone' use case. Second, the implementation avoid recursive call only if the sample action list has a single userspace action. The main optimization implemented in this series removes the static nesting limit check, instead, implement the run time recursion limit check, and recursion avoidance similar to that of the 'recirc' action. This optimization solve both #1 and #2 issues above. One related optimization attempts to avoid copying flow key as long as the actions enclosed does not change the flow key. The detection is performed only once at the flow downloading time. Another related optimization is to rewrite the action list at flow downloading time in order to save the fast path from parsing the sample action list in its original form repeatedly. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> Upstream: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-04-06 13:05:48 -07:00
#ifdef __KERNEL__
struct sample_arg {
bool exec; /* When true, actions in sample will not
* change flow keys. False otherwise.
*/
u32 probability; /* Same value as
* 'OVS_SAMPLE_ATTR_PROBABILITY'.
*/
};
#endif
/**
* enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
* @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
* message should be sent. Required.
* @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
* copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
* @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
* tunnel info.
* @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall.
*/
enum ovs_userspace_attr {
OVS_USERSPACE_ATTR_UNSPEC,
OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port
* to get tunnel info. */
OVS_USERSPACE_ATTR_ACTIONS, /* Optional flag to get actions. */
__OVS_USERSPACE_ATTR_MAX
};
#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
struct ovs_action_trunc {
uint32_t max_len; /* Max packet size in bytes. */
};
/**
* struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
* @mpls_lse: MPLS label stack entry to push.
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
*
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
*/
struct ovs_action_push_mpls {
__be32 mpls_lse;
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
};
2021-11-29 11:52:05 +05:30
/**
* struct ovs_action_add_mpls - %OVS_ACTION_ATTR_ADD_MPLS action
* argument.
* @mpls_lse: MPLS label stack entry to push.
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
* @tun_flags: MPLS tunnel attributes.
*
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
*/
struct ovs_action_add_mpls {
__be32 mpls_lse;
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
__u16 tun_flags;
};
#define OVS_MPLS_L3_TUNNEL_FLAG_MASK (1 << 0) /* Flag to specify the place of
* insertion of MPLS header.
* When false, the MPLS header
* will be inserted at the start
* of the packet.
* When true, the MPLS header
* will be inserted at the start
* of the l3 header.
*/
/**
* struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
* @vlan_tpid: Tag protocol identifier (TPID) to push.
* @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
* (but it will not be set in the 802.1Q header that is pushed).
*
* The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
* The only acceptable TPID values are those that the kernel module also parses
* as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
* by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
*/
struct ovs_action_push_vlan {
__be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
};
/* Data path hash algorithm for computing Datapath hash.
*
* The algorithm type only specifies the fields in a flow
* will be used as part of the hash. Each datapath is free
* to use its own hash algorithm. The hash value will be
* opaque to the user space daemon.
*/
enum ovs_hash_alg {
OVS_HASH_ALG_L4,
#ifndef __KERNEL__
OVS_HASH_ALG_SYM_L4,
#endif
__OVS_HASH_MAX
};
/*
* struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
* @hash_alg: Algorithm used to compute hash prior to recirculation.
* @hash_basis: basis used for computing hash.
*/
struct ovs_action_hash {
uint32_t hash_alg; /* One of ovs_hash_alg. */
uint32_t hash_basis;
};
#ifndef __KERNEL__
#define TNL_PUSH_HEADER_SIZE 512
/*
* struct ovs_action_push_tnl - %OVS_ACTION_ATTR_TUNNEL_PUSH
* @tnl_port: To identify tunnel port to pass header info.
* @out_port: Physical port to send encapsulated packet.
* @header_len: Length of the header to be pushed.
* @tnl_type: This is only required to format this header. Otherwise
* ODP layer can not parse %header.
* @header: Partial header for the tunnel. Tunnel push action can use
* this header to build final header according to actual packet parameters.
*/
struct ovs_action_push_tnl {
odp_port_t tnl_port;
odp_port_t out_port;
uint32_t header_len;
uint32_t tnl_type; /* For logging. */
packets: Remove unnecessary "packed" annotations. I know of two reasons to mark a structure as "packed". The first is because the structure must match some defined interface and therefore compiler-inserted padding between or after members would cause its layout to diverge from that interface. This is not a problem in a structure that follows the general alignment rules that are seen in ABIs for all the architectures that OVS cares about: basically, that a struct member needs to be aligned on a boundary that is a multiple of the member's size. The second reason is because instances of the struct tend to be at misaligned addresses. struct eth_header and struct vlan_eth_header are normally aligned on 16-bit boundaries (at least), and they contain only 16-bit members, so there's no need to pack them. This commit removes the packed annotation. This commit also removes the packed annotation from struct llc_header. Since that struct only contains 8-bit members, I don't know of any benefit to packing it, period. This commit also removes a few more packed annotations that are much less important. When these packed annotations were removed, it caused a few warnings related to casts from 'uint8_t *' to more strictly aligned pointer types, related to struct ovs_action_push_tnl. That's because that struct had a trailing member used to store packet headers, that was declared as a uint8_t[]. Before, when this was cast to 'struct eth_header *', there was no change in alignment since eth_header was packed; now that eth_header is not packed, the compiler considers it suspicious. This commit avoids that problem by changing the member from uint8_t[] to uint32_t[], which assures the compiler that it is properly aligned. Signed-off-by: Ben Pfaff <blp@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-05-30 08:22:03 -07:00
uint32_t header[TNL_PUSH_HEADER_SIZE / 4];
};
#endif
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
/**
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
* @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
* table. This allows future packets for the same connection to be identified
* as 'established' or 'related'. The flow key for the current packet will
* retain the pre-commit connection state.
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
* @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
* mask, the corresponding bit in the value is copied to the connection
* tracking mark field in the connection.
* @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
* mask. For each bit set in the mask, the corresponding bit in the value is
* copied to the connection tracking label field in the connection.
* @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
* @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address
* translation (NAT) on the packet.
* @OVS_CT_ATTR_FORCE_COMMIT: Like %OVS_CT_ATTR_COMMIT, but instead of doing
* nothing if the connection is already committed will check that the current
* packet is in conntrack entry's original direction. If directionality does
* not match, will delete the existing conntrack entry and create a new one.
* @OVS_CT_ATTR_EVENTMASK: Mask of bits indicating which conntrack event types
* (enum ip_conntrack_events IPCT_*) should be reported. For any bit set to
* zero, the corresponding event type is not generated. Default behavior
* depends on system configuration, but typically all event types are
* generated, hence listening on NFNLGRP_CONNTRACK_UPDATE events may get a lot
* of events. Explicitly passing this attribute allows limiting the updates
* received to the events of interest. The bit 1 << IPCT_NEW, 1 <<
* IPCT_RELATED, and 1 << IPCT_DESTROY must be set to ones for those events to
* be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups,
* respectively. Remaining bits control the changes for which an event is
* delivered on the NFNLGRP_CONNTRACK_UPDATE group.
datapath: Add support for conntrack timeout policy This patch adds support for specifying a timeout policy for a connection in connection tracking system in kernel datapath. The timeout policy will be attached to a connection when the connection is committed to conntrack. This patch introduces a new odp field OVS_CT_ATTR_TIMEOUT in the ct action that specifies the timeout policy in the datapath. In the following patch, during the upcall process, the vswitchd will use the ct_zone to look up the corresponding timeout policy and fill OVS_CT_ATTR_TIMEOUT if it is available. The datapath code is from the following two net-next upstream commits. Upstream commit: commit 06bd2bdf19d2f3d22731625e1a47fa1dff5ac407 Author: Yi-Hung Wei <yihung.wei@gmail.com> Date: Tue Mar 26 11:31:14 2019 -0700 openvswitch: Add timeout support to ct action Add support for fine-grain timeout support to conntrack action. The new OVS_CT_ATTR_TIMEOUT attribute of the conntrack action specifies a timeout to be associated with this connection. If no timeout is specified, it acts as is, that is the default timeout for the connection will be automatically applied. Example usage: $ nfct timeout add timeout_1 inet tcp syn_sent 100 established 200 $ ovs-ofctl add-flow br0 in_port=1,ip,tcp,action=ct(commit,timeout=timeout_1) CC: Pravin Shelar <pshelar@ovn.org> CC: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> commit 6d670497e01803b486aa72cc1a718401ab986896 Author: Dan Carpenter <dan.carpenter@oracle.com> Date: Tue Apr 2 09:53:14 2019 +0300 openvswitch: use after free in __ovs_ct_free_action() We free "ct_info->ct" and then use it on the next line when we pass it to nf_ct_destroy_timeout(). This patch swaps the order to avoid the use after free. Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Acked-by: Yi-Hung Wei <yihung.wei@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> Signed-off-by: Justin Pettit <jpettit@ovn.org>
2019-08-28 15:14:28 -07:00
* @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout.
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
*/
enum ovs_ct_attr {
OVS_CT_ATTR_UNSPEC,
OVS_CT_ATTR_COMMIT, /* No argument, commits connection. */
OVS_CT_ATTR_ZONE, /* u16 zone id. */
OVS_CT_ATTR_MARK, /* mark to associate with this connection. */
OVS_CT_ATTR_LABELS, /* label to associate with this connection. */
OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of
related connections. */
OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
OVS_CT_ATTR_FORCE_COMMIT, /* No argument */
OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */
datapath: Add support for conntrack timeout policy This patch adds support for specifying a timeout policy for a connection in connection tracking system in kernel datapath. The timeout policy will be attached to a connection when the connection is committed to conntrack. This patch introduces a new odp field OVS_CT_ATTR_TIMEOUT in the ct action that specifies the timeout policy in the datapath. In the following patch, during the upcall process, the vswitchd will use the ct_zone to look up the corresponding timeout policy and fill OVS_CT_ATTR_TIMEOUT if it is available. The datapath code is from the following two net-next upstream commits. Upstream commit: commit 06bd2bdf19d2f3d22731625e1a47fa1dff5ac407 Author: Yi-Hung Wei <yihung.wei@gmail.com> Date: Tue Mar 26 11:31:14 2019 -0700 openvswitch: Add timeout support to ct action Add support for fine-grain timeout support to conntrack action. The new OVS_CT_ATTR_TIMEOUT attribute of the conntrack action specifies a timeout to be associated with this connection. If no timeout is specified, it acts as is, that is the default timeout for the connection will be automatically applied. Example usage: $ nfct timeout add timeout_1 inet tcp syn_sent 100 established 200 $ ovs-ofctl add-flow br0 in_port=1,ip,tcp,action=ct(commit,timeout=timeout_1) CC: Pravin Shelar <pshelar@ovn.org> CC: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> commit 6d670497e01803b486aa72cc1a718401ab986896 Author: Dan Carpenter <dan.carpenter@oracle.com> Date: Tue Apr 2 09:53:14 2019 +0300 openvswitch: use after free in __ovs_ct_free_action() We free "ct_info->ct" and then use it on the next line when we pass it to nf_ct_destroy_timeout(). This patch swaps the order to avoid the use after free. Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Acked-by: Yi-Hung Wei <yihung.wei@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> Signed-off-by: Justin Pettit <jpettit@ovn.org>
2019-08-28 15:14:28 -07:00
OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for
* fine-grain timeout tuning. */
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
__OVS_CT_ATTR_MAX
};
#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
/*
* struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
* @addresses: Source and destination MAC addresses.
*/
struct ovs_action_push_eth {
struct ovs_key_ethernet addresses;
};
/**
* enum ovs_nat_attr - Attributes for %OVS_CT_ATTR_NAT.
*
* @OVS_NAT_ATTR_SRC: Flag for Source NAT (mangle source address/port).
* @OVS_NAT_ATTR_DST: Flag for Destination NAT (mangle destination
* address/port). Only one of (@OVS_NAT_ATTR_SRC, @OVS_NAT_ATTR_DST) may be
* specified. Effective only for packets for ct_state NEW connections.
* Committed connections are mangled by the NAT action according to the
* committed NAT type regardless of the flags specified. As a corollary, a NAT
* action without a NAT type flag will only mangle packets of committed
* connections. The following NAT attributes only apply for NEW connections,
* and they may be included only when the CT action has the @OVS_CT_ATTR_COMMIT
* flag and either @OVS_NAT_ATTR_SRC, @OVS_NAT_ATTR_DST is also included.
* @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr
* @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr
* @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port)
* @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port)
* @OVS_NAT_ATTR_PERSISTENT: Flag for persistent IP mapping across reboots
* @OVS_NAT_ATTR_PROTO_HASH: Flag for pseudo random L4 port mapping (MD5)
* @OVS_NAT_ATTR_PROTO_RANDOM: Flag for fully randomized L4 port mapping
*/
enum ovs_nat_attr {
OVS_NAT_ATTR_UNSPEC,
OVS_NAT_ATTR_SRC,
OVS_NAT_ATTR_DST,
OVS_NAT_ATTR_IP_MIN,
OVS_NAT_ATTR_IP_MAX,
OVS_NAT_ATTR_PROTO_MIN,
OVS_NAT_ATTR_PROTO_MAX,
OVS_NAT_ATTR_PERSISTENT,
OVS_NAT_ATTR_PROTO_HASH,
OVS_NAT_ATTR_PROTO_RANDOM,
__OVS_NAT_ATTR_MAX,
};
#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
Add a new OVS action check_pkt_larger This patch adds a new action 'check_pkt_larger' which checks if the packet is larger than the given size and stores the result in the destination register. Usage: check_pkt_larger(len)->REGISTER Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next; This patch makes use of the new datapath action - 'check_pkt_len' which was recently added in the commit [1]. At the start of ovs-vswitchd, datapath is probed for this action. If the datapath action is present, then 'check_pkt_larger' makes use of this datapath action. Datapath action 'check_pkt_len' takes these nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. Let's say we have these flows added to an OVS bridge br-int table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1) table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3 table=1, priority=100,in_port=1,ip,actions=output:4 Then the action 'check_pkt_larger' will be translated as - check_pkt_len(size=100,gt(3),le(4)) datapath will check the packet length and if the packet length is greater than 100, it will output to port 3, else it will output to port 4. In case, datapath doesn't support 'check_pkt_len' action, the OVS action 'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added. This OVS action is intended to be used by OVN to check the packet length and generate an ICMP packet with type 3, code 4 and next hop mtu in the logical router pipeline if the MTU of the physical interface is lesser than the packet length. More information can be found here [2] [1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 [2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Ben Pfaff <blp@ovn.org> CC: Gregory Rose <gvrose8192@gmail.com> Acked-by: Mark Michelson <mmichels@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
/*
* enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN.
*
* @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for.
* @OVS_CHECK_PKT_LEN_ATTR_USERSPACE_COND: u8 comparison condition to send
* the packet to userspace. One of OVS_CHECK_PKT_LEN_COND_*.
* @OVS_CHECK_PKT_LEN_ATTR_USERPACE - Nested OVS_USERSPACE_ATTR_* actions.
*/
enum ovs_check_pkt_len_attr {
OVS_CHECK_PKT_LEN_ATTR_UNSPEC,
OVS_CHECK_PKT_LEN_ATTR_PKT_LEN,
OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER,
OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL,
__OVS_CHECK_PKT_LEN_ATTR_MAX,
datapath: Add a new action check_pkt_len Upstream commit: commit 4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 Author: Numan Siddique <nusiddiq@redhat.com> Date: Tue Mar 26 06:13:46 2019 +0530 net: openvswitch: Add a new action check_pkt_len This patch adds a new action - 'check_pkt_len' which checks the packet length and executes a set of actions if the packet length is greater than the specified length or executes another set of actions if the packet length is lesser or equal to. This action takes below nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. The main use case for adding this action is to solve the packet drops because of MTU mismatch in OVN virtual networking solution. When a VM (which belongs to a logical switch of OVN) sends a packet destined to go via the gateway router and if the nic which provides external connectivity, has a lesser MTU, OVS drops the packet if the packet length is greater than this MTU. With the help of this action, OVN will check the packet length and if it is greater than the MTU size, it will generate an ICMP packet (type 3, code 4) and includes the next hop mtu in it so that the sender can fragment the packets. Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Gregory Rose <gvrose8192@gmail.com> CC: Pravin B Shelar <pshelar@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Tested-by: Greg Rose <gvrose8192@gmail.com> Reviewed-by: Greg Rose <gvrose8192@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Use of 'nla_parse_strict()' (in validate_and_copy_check_len()) is available only in recent kernels. So changed it to 'nla_parse_nested()'. Signed-off-by: Numan Siddique <nusiddiq@redhat.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Reviewed-by: Greg Rose <gvrose8192@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:43 +05:30
#ifdef __KERNEL__
OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */
#endif
Add a new OVS action check_pkt_larger This patch adds a new action 'check_pkt_larger' which checks if the packet is larger than the given size and stores the result in the destination register. Usage: check_pkt_larger(len)->REGISTER Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next; This patch makes use of the new datapath action - 'check_pkt_len' which was recently added in the commit [1]. At the start of ovs-vswitchd, datapath is probed for this action. If the datapath action is present, then 'check_pkt_larger' makes use of this datapath action. Datapath action 'check_pkt_len' takes these nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. Let's say we have these flows added to an OVS bridge br-int table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1) table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3 table=1, priority=100,in_port=1,ip,actions=output:4 Then the action 'check_pkt_larger' will be translated as - check_pkt_len(size=100,gt(3),le(4)) datapath will check the packet length and if the packet length is greater than 100, it will output to port 3, else it will output to port 4. In case, datapath doesn't support 'check_pkt_len' action, the OVS action 'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added. This OVS action is intended to be used by OVN to check the packet length and generate an ICMP packet with type 3, code 4 and next hop mtu in the logical router pipeline if the MTU of the physical interface is lesser than the packet length. More information can be found here [2] [1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 [2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Ben Pfaff <blp@ovn.org> CC: Gregory Rose <gvrose8192@gmail.com> Acked-by: Mark Michelson <mmichels@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
};
#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1)
datapath: Add a new action check_pkt_len Upstream commit: commit 4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 Author: Numan Siddique <nusiddiq@redhat.com> Date: Tue Mar 26 06:13:46 2019 +0530 net: openvswitch: Add a new action check_pkt_len This patch adds a new action - 'check_pkt_len' which checks the packet length and executes a set of actions if the packet length is greater than the specified length or executes another set of actions if the packet length is lesser or equal to. This action takes below nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. The main use case for adding this action is to solve the packet drops because of MTU mismatch in OVN virtual networking solution. When a VM (which belongs to a logical switch of OVN) sends a packet destined to go via the gateway router and if the nic which provides external connectivity, has a lesser MTU, OVS drops the packet if the packet length is greater than this MTU. With the help of this action, OVN will check the packet length and if it is greater than the MTU size, it will generate an ICMP packet (type 3, code 4) and includes the next hop mtu in it so that the sender can fragment the packets. Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Gregory Rose <gvrose8192@gmail.com> CC: Pravin B Shelar <pshelar@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Tested-by: Greg Rose <gvrose8192@gmail.com> Reviewed-by: Greg Rose <gvrose8192@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Use of 'nla_parse_strict()' (in validate_and_copy_check_len()) is available only in recent kernels. So changed it to 'nla_parse_nested()'. Signed-off-by: Numan Siddique <nusiddiq@redhat.com> Tested-by: Greg Rose <gvrose8192@gmail.com> Reviewed-by: Greg Rose <gvrose8192@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:43 +05:30
#ifdef __KERNEL__
struct check_pkt_len_arg {
u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */
bool exec_for_greater; /* When true, actions in IF_GREATE will
* not change flow keys. False otherwise.
*/
bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL
* will not change flow keys. False
* otherwise.
*/
};
#endif
/**
* enum ovs_action_attr - Action types.
*
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
* @OVS_ACTION_ATTR_TRUNC: Output packet to port with truncated packet size.
* @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
* %OVS_USERSPACE_ATTR_* attributes.
* @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
* onto the packet.
* @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
* from the packet.
* @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
* the nested %OVS_SAMPLE_ATTR_* attributes.
* @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
* single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
* value.
* @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header. A
* nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
* and a mask. For every bit set in the mask, the corresponding bit value
* is copied from the value to the packet header field, rest of the bits are
* left unchanged. The non-masked value bits must be passed in as zeroes.
* Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
* @OVS_ACTION_ATTR_RECIRC: Recirculate within the data path.
* @OVS_ACTION_ATTR_HASH: Compute and set flow hash value.
* @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
* top of the packets MPLS label stack. Set the ethertype of the
* encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
* indicate the new packet contents.
* @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the
* packet's MPLS label stack. Set the encapsulating frame's ethertype to
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
* @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
* entries in the flow key.
* @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the
* packet.
* @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the packet.
* @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet.
* @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
* @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
* type may not be changed.
*
* @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
* from the @OVS_ACTION_ATTR_SET.
* @OVS_ACTION_ATTR_TUNNEL_PUSH: Push tunnel header described by struct
* ovs_action_push_tnl.
* @OVS_ACTION_ATTR_TUNNEL_POP: Lookup tunnel port by port-no passed and pop
* tunnel header.
* @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
* packet, or modify the packet (e.g., change the DSCP field).
* @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
* actions without affecting the original packet and key.
Add a new OVS action check_pkt_larger This patch adds a new action 'check_pkt_larger' which checks if the packet is larger than the given size and stores the result in the destination register. Usage: check_pkt_larger(len)->REGISTER Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next; This patch makes use of the new datapath action - 'check_pkt_len' which was recently added in the commit [1]. At the start of ovs-vswitchd, datapath is probed for this action. If the datapath action is present, then 'check_pkt_larger' makes use of this datapath action. Datapath action 'check_pkt_len' takes these nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. Let's say we have these flows added to an OVS bridge br-int table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1) table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3 table=1, priority=100,in_port=1,ip,actions=output:4 Then the action 'check_pkt_larger' will be translated as - check_pkt_len(size=100,gt(3),le(4)) datapath will check the packet length and if the packet length is greater than 100, it will output to port 3, else it will output to port 4. In case, datapath doesn't support 'check_pkt_len' action, the OVS action 'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added. This OVS action is intended to be used by OVN to check the packet length and generate an ICMP packet with type 3, code 4 and next hop mtu in the logical router pipeline if the MTU of the physical interface is lesser than the packet length. More information can be found here [2] [1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 [2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Ben Pfaff <blp@ovn.org> CC: Gregory Rose <gvrose8192@gmail.com> Acked-by: Mark Michelson <mmichels@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
* @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set
* of actions if greater than the specified packet length, else execute
* another set of actions.
2021-11-29 11:52:05 +05:30
* @OVS_ACTION_ATTR_ADD_MPLS: Push a new MPLS label stack entry at the
* start of the packet or at the start of the l3 header depending on the value
* of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
* argument.
userspace: Improved packet drop statistics. Currently OVS maintains explicit packet drop/error counters only on port level. Packets that are dropped as part of normal OpenFlow processing are counted in flow stats of “drop” flows or as table misses in table stats. These can only be interpreted by controllers that know the semantics of the configured OpenFlow pipeline. Without that knowledge, it is impossible for an OVS user to obtain e.g. the total number of packets dropped due to OpenFlow rules. Furthermore, there are numerous other reasons for which packets can be dropped by OVS slow path that are not related to the OpenFlow pipeline. The generated datapath flow entries include a drop action to avoid further expensive upcalls to the slow path, but subsequent packets dropped by the datapath are not accounted anywhere. Finally, the datapath itself drops packets in certain error situations. Also, these drops are today not accounted for.This makes it difficult for OVS users to monitor packet drop in an OVS instance and to alert a management system in case of a unexpected increase of such drops. Also OVS trouble-shooters face difficulties in analysing packet drops. With this patch we implement following changes to address the issues mentioned above. 1. Identify and account all the silent packet drop scenarios 2. Display these drops in ovs-appctl coverage/show Co-authored-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Co-authored-by: Keshav Gupta <keshugupta1@gmail.com> Signed-off-by: Anju Thomas <anju.thomas@ericsson.com> Signed-off-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Signed-off-by: Keshav Gupta <keshugupta1@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com Acked-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2019-12-18 05:48:12 +01:00
* @OVS_ACTION_ATTR_DROP: Explicit drop action.
*/
enum ovs_action_attr {
OVS_ACTION_ATTR_UNSPEC,
OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
OVS_ACTION_ATTR_SET_MASKED, /* One nested OVS_KEY_ATTR_* including
* data immediately followed by a mask.
* The data must be zero for the unmasked
* bits. */
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */
OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
OVS_ACTION_ATTR_POP_ETH, /* No argument. */
OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */
OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */
OVS_ACTION_ATTR_POP_NSH, /* No argument. */
OVS_ACTION_ATTR_METER, /* u32 meter number. */
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
Add a new OVS action check_pkt_larger This patch adds a new action 'check_pkt_larger' which checks if the packet is larger than the given size and stores the result in the destination register. Usage: check_pkt_larger(len)->REGISTER Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next; This patch makes use of the new datapath action - 'check_pkt_len' which was recently added in the commit [1]. At the start of ovs-vswitchd, datapath is probed for this action. If the datapath action is present, then 'check_pkt_larger' makes use of this datapath action. Datapath action 'check_pkt_len' takes these nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. Let's say we have these flows added to an OVS bridge br-int table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1) table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3 table=1, priority=100,in_port=1,ip,actions=output:4 Then the action 'check_pkt_larger' will be translated as - check_pkt_len(size=100,gt(3),le(4)) datapath will check the packet length and if the packet length is greater than 100, it will output to port 3, else it will output to port 4. In case, datapath doesn't support 'check_pkt_len' action, the OVS action 'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added. This OVS action is intended to be used by OVN to check the packet length and generate an ICMP packet with type 3, code 4 and next hop mtu in the logical router pipeline if the MTU of the physical interface is lesser than the packet length. More information can be found here [2] [1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 [2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Ben Pfaff <blp@ovn.org> CC: Gregory Rose <gvrose8192@gmail.com> Acked-by: Mark Michelson <mmichels@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
2021-11-29 11:52:05 +05:30
OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
#ifndef __KERNEL__
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */
userspace: Improved packet drop statistics. Currently OVS maintains explicit packet drop/error counters only on port level. Packets that are dropped as part of normal OpenFlow processing are counted in flow stats of “drop” flows or as table misses in table stats. These can only be interpreted by controllers that know the semantics of the configured OpenFlow pipeline. Without that knowledge, it is impossible for an OVS user to obtain e.g. the total number of packets dropped due to OpenFlow rules. Furthermore, there are numerous other reasons for which packets can be dropped by OVS slow path that are not related to the OpenFlow pipeline. The generated datapath flow entries include a drop action to avoid further expensive upcalls to the slow path, but subsequent packets dropped by the datapath are not accounted anywhere. Finally, the datapath itself drops packets in certain error situations. Also, these drops are today not accounted for.This makes it difficult for OVS users to monitor packet drop in an OVS instance and to alert a management system in case of a unexpected increase of such drops. Also OVS trouble-shooters face difficulties in analysing packet drops. With this patch we implement following changes to address the issues mentioned above. 1. Identify and account all the silent packet drop scenarios 2. Display these drops in ovs-appctl coverage/show Co-authored-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Co-authored-by: Keshav Gupta <keshugupta1@gmail.com> Signed-off-by: Anju Thomas <anju.thomas@ericsson.com> Signed-off-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Signed-off-by: Keshav Gupta <keshugupta1@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com Acked-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2019-12-18 05:48:12 +01:00
OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */
userspace: Avoid dp_hash recirculation for balance-tcp bond mode. Problem: In OVS, flows with output over a bond interface of type “balance-tcp” gets translated by the ofproto layer into "HASH" and "RECIRC" datapath actions. After recirculation, the packet is forwarded to the bond member port based on 8-bits of the datapath hash value computed through dp_hash. This causes performance degradation in the following ways: 1. The recirculation of the packet implies another lookup of the packet’s flow key in the exact match cache (EMC) and potentially Megaflow classifier (DPCLS). This is the biggest cost factor. 2. The recirculated packets have a new “RSS” hash and compete with the original packets for the scarce number of EMC slots. This implies more EMC misses and potentially EMC thrashing causing costly DPCLS lookups. 3. The 256 extra megaflow entries per bond for dp_hash bond selection put additional load on the revalidation threads. Owing to this performance degradation, deployments stick to “balance-slb” bond mode even though it does not do active-active load balancing for VXLAN- and GRE-tunnelled traffic because all tunnel packet have the same source MAC address. Proposed optimization: This proposal introduces a new load-balancing output action instead of recirculation. Maintain one table per-bond (could just be an array of uint16's) and program it the same way internal flows are created today for each possible hash value (256 entries) from ofproto layer. Use this table to load-balance flows as part of output action processing. Currently xlate_normal() -> output_normal() -> bond_update_post_recirc_rules() -> bond_may_recirc() and compose_output_action__() generate 'dp_hash(hash_l4(0))' and 'recirc(<RecircID>)' actions. In this case the RecircID identifies the bond. For the recirculated packets the ofproto layer installs megaflow entries that match on RecircID and masked dp_hash and send them to the corresponding output port. Instead, we will now generate action as 'lb_output(<bond id>)' This combines hash computation (only if needed, else re-use RSS hash) and inline load-balancing over the bond. This action is used *only* for balance-tcp bonds in userspace datapath (the OVS kernel datapath remains unchanged). Example: Current scheme: With 8 UDP flows (with random UDP src port): flow-dump from pmd on cpu core: 2 recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1) recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1 New scheme: We can do with a single flow entry (for any number of new flows): in_port(7),<...> actions:lb_output(1) A new CLI has been added to dump datapath bond cache as given below. # ovs-appctl dpif-netdev/bond-show [dp] Bond cache: bond-id 1 : bucket 0 - slave 2 bucket 1 - slave 1 bucket 2 - slave 2 bucket 3 - slave 1 Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com> Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com> Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com> Tested-by: Matteo Croce <mcroce@redhat.com> Tested-by: Adrian Moreno <amorenoz@redhat.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-05-22 10:50:05 +02:00
OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */
#endif
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
#ifdef __KERNEL__
OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
* set action converted from
* OVS_ACTION_ATTR_SET. */
#endif
};
#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
/* Meters. */
#define OVS_METER_FAMILY "ovs_meter"
#define OVS_METER_MCGROUP "ovs_meter"
#define OVS_METER_VERSION 0x1
enum ovs_meter_cmd {
OVS_METER_CMD_UNSPEC,
OVS_METER_CMD_FEATURES, /* Get features supported by the datapath. */
OVS_METER_CMD_SET, /* Add or modify a meter. */
OVS_METER_CMD_DEL, /* Delete a meter. */
OVS_METER_CMD_GET /* Get meter stats. */
};
enum ovs_meter_attr {
OVS_METER_ATTR_UNSPEC,
OVS_METER_ATTR_ID, /* u32 meter ID within datapath. */
OVS_METER_ATTR_KBPS, /* No argument. If set, units in kilobits
* per second. Otherwise, units in
* packets per second.
*/
OVS_METER_ATTR_STATS, /* struct ovs_flow_stats for the meter. */
OVS_METER_ATTR_BANDS, /* Nested attributes for meter bands. */
OVS_METER_ATTR_USED, /* u64 msecs last used in monotonic time. */
OVS_METER_ATTR_CLEAR, /* Flag to clear stats, used. */
OVS_METER_ATTR_MAX_METERS, /* u32 number of meters supported. */
OVS_METER_ATTR_MAX_BANDS, /* u32 max number of bands per meter. */
OVS_METER_ATTR_PAD,
__OVS_METER_ATTR_MAX
};
#define OVS_METER_ATTR_MAX (__OVS_METER_ATTR_MAX - 1)
enum ovs_band_attr {
OVS_BAND_ATTR_UNSPEC,
OVS_BAND_ATTR_TYPE, /* u32 OVS_METER_BAND_TYPE_* constant. */
OVS_BAND_ATTR_RATE, /* u32 band rate in meter units (see above). */
OVS_BAND_ATTR_BURST, /* u32 burst size in meter units. */
OVS_BAND_ATTR_STATS, /* struct ovs_flow_stats for the band. */
__OVS_BAND_ATTR_MAX
};
#define OVS_BAND_ATTR_MAX (__OVS_BAND_ATTR_MAX - 1)
enum ovs_meter_band_type {
OVS_METER_BAND_TYPE_UNSPEC,
OVS_METER_BAND_TYPE_DROP, /* Drop exceeding packets. */
__OVS_METER_BAND_TYPE_MAX
};
#define OVS_METER_BAND_TYPE_MAX (__OVS_METER_BAND_TYPE_MAX - 1)
/* Conntrack limit */
#define OVS_CT_LIMIT_FAMILY "ovs_ct_limit"
#define OVS_CT_LIMIT_MCGROUP "ovs_ct_limit"
#define OVS_CT_LIMIT_VERSION 0x1
enum ovs_ct_limit_cmd {
OVS_CT_LIMIT_CMD_UNSPEC,
OVS_CT_LIMIT_CMD_SET, /* Add or modify ct limit. */
OVS_CT_LIMIT_CMD_DEL, /* Delete ct limit. */
OVS_CT_LIMIT_CMD_GET /* Get ct limit. */
};
enum ovs_ct_limit_attr {
OVS_CT_LIMIT_ATTR_UNSPEC,
OVS_CT_LIMIT_ATTR_ZONE_LIMIT, /* Nested struct ovs_zone_limit. */
__OVS_CT_LIMIT_ATTR_MAX
};
#define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
#define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
struct ovs_zone_limit {
int zone_id;
__u32 limit;
__u32 count;
};
#define OVS_CLONE_ATTR_EXEC 0 /* Specify an u32 value. When nonzero,
* actions in clone will not change flow
* keys. False otherwise.
*/
#endif /* _LINUX_OPENVSWITCH_H */