2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 09:58:01 +00:00
ovs/lib/odp-execute.c

733 lines
23 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
* Copyright (c) 2013 Simon Horman
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "odp-execute.h"
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/ip6.h>
#include <stdlib.h>
#include <string.h>
#include "dp-packet.h"
#include "dpif.h"
#include "netlink.h"
#include "odp-netlink.h"
#include "odp-util.h"
#include "packets.h"
#include "flow.h"
#include "unaligned.h"
#include "util.h"
odp-execute: Optimize IP header modification in OVS datapath I measured the packet processing cost of OVS DPDK datapath for different OpenFlow actions. I configured OVS to use a single pmd thread and measured the packet throughput in a phy-to-phy setup. I used 10G interfaces bounded to DPDK driver and overloaded the vSwitch with 64 byte packets through one of the 10G interfaces. The processing cost of the dec_ttl action seemed to be gratuitously high compared with other actions. I looked into the code and saw that dec_ttl is encoded as a masked nested attribute in OVS_ACTION_ATTR_SET_MASKED(OVS_KEY_ATTR_IPV4). That way, OVS datapath can modify several IP header fields (TTL, TOS, source and destination IP addresses) by a single invocation of packet_set_ipv4() in the odp_set_ipv4() function in the lib/odp-execute.c file. The packet_set_ipv4() function takes the new TOS, TTL and IP addresses as arguments, compares them with the actual ones and updates the fields if needed. This means, that even if only TTL needs to be updated, each of the four IP header fields is passed to the callee and is compared to the actual field for each packet. The odp_set_ipv4() caller function possesses information about the fields that need to be updated in the 'mask' structure. The idea is to spare invocation of the packet_set_ipv4() function but use its code parts directly. So the 'mask' can be used to decide which IP header fields need to be updated. In addition, a faster packet processing can be achieved if the values of local variables are calculated right before their usage. | T | T | I | I | | T | O | P | P | Vanilla OVS || + new patch | L | S | s | d | (nsec/packet) || (nsec/packet) -------+---+---+---+---+---------------++--------------- output | | | | | 67.19 || 67.19 | X | | | | 74.48 || 68.78 | | X | | | 74.42 || 70.07 | | | X | | 84.62 || 78.03 | | | | X | 84.25 || 77.94 | | | X | X | 97.46 || 91.86 | X | | X | X | 100.42 || 96.00 | X | X | X | X | 102.80 || 100.73 The table shows the average processing cost of packets in nanoseconds for the following actions: output; output + dec_ttl; output + mod_nw_tos; output + mod_nw_src; output + mod_nw_dst and some of their combinations. I ran each test five times. The values are the mean of the readings obtained. I added OVS_LIKELY to the 'if' condition for the TTL field, since as far as I know, this field will typically be decremented when any field of the IP header is modified. Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
2016-12-13 17:27:37 +00:00
#include "csum.h"
/* Masked copy of an ethernet address. 'src' is already properly masked. */
static void
ether_addr_copy_masked(struct eth_addr *dst, const struct eth_addr src,
const struct eth_addr mask)
{
int i;
for (i = 0; i < ARRAY_SIZE(dst->be16); i++) {
dst->be16[i] = src.be16[i] | (dst->be16[i] & ~mask.be16[i]);
}
}
static void
odp_eth_set_addrs(struct dp_packet *packet, const struct ovs_key_ethernet *key,
const struct ovs_key_ethernet *mask)
{
struct eth_header *eh = dp_packet_l2(packet);
if (eh) {
if (!mask) {
eh->eth_src = key->eth_src;
eh->eth_dst = key->eth_dst;
} else {
ether_addr_copy_masked(&eh->eth_src, key->eth_src, mask->eth_src);
ether_addr_copy_masked(&eh->eth_dst, key->eth_dst, mask->eth_dst);
}
}
}
static void
odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
const struct ovs_key_ipv4 *mask)
{
struct ip_header *nh = dp_packet_l3(packet);
odp-execute: Optimize IP header modification in OVS datapath I measured the packet processing cost of OVS DPDK datapath for different OpenFlow actions. I configured OVS to use a single pmd thread and measured the packet throughput in a phy-to-phy setup. I used 10G interfaces bounded to DPDK driver and overloaded the vSwitch with 64 byte packets through one of the 10G interfaces. The processing cost of the dec_ttl action seemed to be gratuitously high compared with other actions. I looked into the code and saw that dec_ttl is encoded as a masked nested attribute in OVS_ACTION_ATTR_SET_MASKED(OVS_KEY_ATTR_IPV4). That way, OVS datapath can modify several IP header fields (TTL, TOS, source and destination IP addresses) by a single invocation of packet_set_ipv4() in the odp_set_ipv4() function in the lib/odp-execute.c file. The packet_set_ipv4() function takes the new TOS, TTL and IP addresses as arguments, compares them with the actual ones and updates the fields if needed. This means, that even if only TTL needs to be updated, each of the four IP header fields is passed to the callee and is compared to the actual field for each packet. The odp_set_ipv4() caller function possesses information about the fields that need to be updated in the 'mask' structure. The idea is to spare invocation of the packet_set_ipv4() function but use its code parts directly. So the 'mask' can be used to decide which IP header fields need to be updated. In addition, a faster packet processing can be achieved if the values of local variables are calculated right before their usage. | T | T | I | I | | T | O | P | P | Vanilla OVS || + new patch | L | S | s | d | (nsec/packet) || (nsec/packet) -------+---+---+---+---+---------------++--------------- output | | | | | 67.19 || 67.19 | X | | | | 74.48 || 68.78 | | X | | | 74.42 || 70.07 | | | X | | 84.62 || 78.03 | | | | X | 84.25 || 77.94 | | | X | X | 97.46 || 91.86 | X | | X | X | 100.42 || 96.00 | X | X | X | X | 102.80 || 100.73 The table shows the average processing cost of packets in nanoseconds for the following actions: output; output + dec_ttl; output + mod_nw_tos; output + mod_nw_src; output + mod_nw_dst and some of their combinations. I ran each test five times. The values are the mean of the readings obtained. I added OVS_LIKELY to the 'if' condition for the TTL field, since as far as I know, this field will typically be decremented when any field of the IP header is modified. Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
2016-12-13 17:27:37 +00:00
ovs_be32 ip_src_nh;
ovs_be32 ip_dst_nh;
ovs_be32 new_ip_src;
ovs_be32 new_ip_dst;
uint8_t new_tos;
uint8_t new_ttl;
odp-execute: Optimize IP header modification in OVS datapath I measured the packet processing cost of OVS DPDK datapath for different OpenFlow actions. I configured OVS to use a single pmd thread and measured the packet throughput in a phy-to-phy setup. I used 10G interfaces bounded to DPDK driver and overloaded the vSwitch with 64 byte packets through one of the 10G interfaces. The processing cost of the dec_ttl action seemed to be gratuitously high compared with other actions. I looked into the code and saw that dec_ttl is encoded as a masked nested attribute in OVS_ACTION_ATTR_SET_MASKED(OVS_KEY_ATTR_IPV4). That way, OVS datapath can modify several IP header fields (TTL, TOS, source and destination IP addresses) by a single invocation of packet_set_ipv4() in the odp_set_ipv4() function in the lib/odp-execute.c file. The packet_set_ipv4() function takes the new TOS, TTL and IP addresses as arguments, compares them with the actual ones and updates the fields if needed. This means, that even if only TTL needs to be updated, each of the four IP header fields is passed to the callee and is compared to the actual field for each packet. The odp_set_ipv4() caller function possesses information about the fields that need to be updated in the 'mask' structure. The idea is to spare invocation of the packet_set_ipv4() function but use its code parts directly. So the 'mask' can be used to decide which IP header fields need to be updated. In addition, a faster packet processing can be achieved if the values of local variables are calculated right before their usage. | T | T | I | I | | T | O | P | P | Vanilla OVS || + new patch | L | S | s | d | (nsec/packet) || (nsec/packet) -------+---+---+---+---+---------------++--------------- output | | | | | 67.19 || 67.19 | X | | | | 74.48 || 68.78 | | X | | | 74.42 || 70.07 | | | X | | 84.62 || 78.03 | | | | X | 84.25 || 77.94 | | | X | X | 97.46 || 91.86 | X | | X | X | 100.42 || 96.00 | X | X | X | X | 102.80 || 100.73 The table shows the average processing cost of packets in nanoseconds for the following actions: output; output + dec_ttl; output + mod_nw_tos; output + mod_nw_src; output + mod_nw_dst and some of their combinations. I ran each test five times. The values are the mean of the readings obtained. I added OVS_LIKELY to the 'if' condition for the TTL field, since as far as I know, this field will typically be decremented when any field of the IP header is modified. Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
2016-12-13 17:27:37 +00:00
if (mask->ipv4_src) {
ip_src_nh = get_16aligned_be32(&nh->ip_src);
new_ip_src = key->ipv4_src | (ip_src_nh & ~mask->ipv4_src);
if (ip_src_nh != new_ip_src) {
packet_set_ipv4_addr(packet, &nh->ip_src, new_ip_src);
}
}
if (mask->ipv4_dst) {
ip_dst_nh = get_16aligned_be32(&nh->ip_dst);
new_ip_dst = key->ipv4_dst | (ip_dst_nh & ~mask->ipv4_dst);
if (ip_dst_nh != new_ip_dst) {
packet_set_ipv4_addr(packet, &nh->ip_dst, new_ip_dst);
}
}
if (mask->ipv4_tos) {
new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
if (nh->ip_tos != new_tos) {
nh->ip_csum = recalc_csum16(nh->ip_csum,
htons((uint16_t) nh->ip_tos),
htons((uint16_t) new_tos));
nh->ip_tos = new_tos;
}
}
if (OVS_LIKELY(mask->ipv4_ttl)) {
new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
htons(new_ttl << 8));
nh->ip_ttl = new_ttl;
}
}
}
static struct in6_addr *
mask_ipv6_addr(const ovs_16aligned_be32 *old, const struct in6_addr *addr,
const struct in6_addr *mask, struct in6_addr *masked)
{
#ifdef s6_addr32
for (int i = 0; i < 4; i++) {
masked->s6_addr32[i] = addr->s6_addr32[i]
| (get_16aligned_be32(&old[i]) & ~mask->s6_addr32[i]);
}
#else
const uint8_t *old8 = (const uint8_t *)old;
for (int i = 0; i < 16; i++) {
masked->s6_addr[i] = addr->s6_addr[i] | (old8[i] & ~mask->s6_addr[i]);
}
#endif
return masked;
}
static void
odp_set_ipv6(struct dp_packet *packet, const struct ovs_key_ipv6 *key,
const struct ovs_key_ipv6 *mask)
{
struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
struct in6_addr sbuf, dbuf;
uint8_t old_tc = ntohl(get_16aligned_be32(&nh->ip6_flow)) >> 20;
ovs_be32 old_fl = get_16aligned_be32(&nh->ip6_flow) & htonl(0xfffff);
packet_set_ipv6(
packet,
mask_ipv6_addr(nh->ip6_src.be32, &key->ipv6_src, &mask->ipv6_src,
&sbuf),
mask_ipv6_addr(nh->ip6_dst.be32, &key->ipv6_dst, &mask->ipv6_dst,
&dbuf),
key->ipv6_tclass | (old_tc & ~mask->ipv6_tclass),
key->ipv6_label | (old_fl & ~mask->ipv6_label),
key->ipv6_hlimit | (nh->ip6_hlim & ~mask->ipv6_hlimit));
}
static void
odp_set_tcp(struct dp_packet *packet, const struct ovs_key_tcp *key,
const struct ovs_key_tcp *mask)
{
struct tcp_header *th = dp_packet_l4(packet);
if (OVS_LIKELY(th && dp_packet_get_tcp_payload(packet))) {
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
packet_set_tcp_port(packet,
key->tcp_src | (th->tcp_src & ~mask->tcp_src),
key->tcp_dst | (th->tcp_dst & ~mask->tcp_dst));
}
}
static void
odp_set_udp(struct dp_packet *packet, const struct ovs_key_udp *key,
const struct ovs_key_udp *mask)
{
struct udp_header *uh = dp_packet_l4(packet);
if (OVS_LIKELY(uh && dp_packet_get_udp_payload(packet))) {
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
packet_set_udp_port(packet,
key->udp_src | (uh->udp_src & ~mask->udp_src),
key->udp_dst | (uh->udp_dst & ~mask->udp_dst));
}
}
static void
odp_set_sctp(struct dp_packet *packet, const struct ovs_key_sctp *key,
const struct ovs_key_sctp *mask)
{
struct sctp_header *sh = dp_packet_l4(packet);
if (OVS_LIKELY(sh && dp_packet_get_sctp_payload(packet))) {
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
packet_set_sctp_port(packet,
key->sctp_src | (sh->sctp_src & ~mask->sctp_src),
key->sctp_dst | (sh->sctp_dst & ~mask->sctp_dst));
}
}
static void
odp_set_tunnel_action(const struct nlattr *a, struct flow_tnl *tun_key)
{
enum odp_key_fitness fitness;
tun-metadata: Manage tunnel TLV mapping table on a per-bridge basis. When using tunnel TLVs (at the moment, this means Geneve options), a controller must first map the class and type onto an appropriate OXM field so that it can be used in OVS flow operations. This table is managed using OpenFlow extensions. The original code that added support for TLVs made the mapping table global as a simplification. However, this is not really logically correct as the OpenFlow management commands are operating on a per-bridge basis. This removes the original limitation to make the table per-bridge. One nice result of this change is that it is generally clearer whether the tunnel metadata is in datapath or OpenFlow format. Rather than allowing ad-hoc format changes and trying to handle both formats in the tunnel metadata functions, the format is more clearly separated by function. Datapaths (both kernel and userspace) use datapath format and it is not changed during the upcall process. At the beginning of action translation, tunnel metadata is converted to OpenFlow format and flows and wildcards are translated back at the end of the process. As an additional benefit, this change improves performance in some flow setup situations by keeping the tunnel metadata in the original packet format in more cases. This helps when copies need to be made as the amount of data touched is only what is present in the packet rather than the maximum amount of metadata supported. Co-authored-by: Madhu Challa <challa@noironetworks.com> Signed-off-by: Madhu Challa <challa@noironetworks.com> Signed-off-by: Jesse Gross <jesse@kernel.org> Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-19 18:36:04 -07:00
fitness = odp_tun_key_from_attr(a, tun_key);
ovs_assert(fitness != ODP_FIT_ERROR);
}
static void
set_arp(struct dp_packet *packet, const struct ovs_key_arp *key,
const struct ovs_key_arp *mask)
{
struct arp_eth_header *arp = dp_packet_l3(packet);
if (!mask) {
arp->ar_op = key->arp_op;
arp->ar_sha = key->arp_sha;
put_16aligned_be32(&arp->ar_spa, key->arp_sip);
arp->ar_tha = key->arp_tha;
put_16aligned_be32(&arp->ar_tpa, key->arp_tip);
} else {
ovs_be32 ar_spa = get_16aligned_be32(&arp->ar_spa);
ovs_be32 ar_tpa = get_16aligned_be32(&arp->ar_tpa);
arp->ar_op = key->arp_op | (arp->ar_op & ~mask->arp_op);
ether_addr_copy_masked(&arp->ar_sha, key->arp_sha, mask->arp_sha);
put_16aligned_be32(&arp->ar_spa,
key->arp_sip | (ar_spa & ~mask->arp_sip));
ether_addr_copy_masked(&arp->ar_tha, key->arp_tha, mask->arp_tha);
put_16aligned_be32(&arp->ar_tpa,
key->arp_tip | (ar_tpa & ~mask->arp_tip));
}
}
static void
odp_set_nd(struct dp_packet *packet, const struct ovs_key_nd *key,
const struct ovs_key_nd *mask)
{
const struct ovs_nd_msg *ns = dp_packet_l4(packet);
const struct ovs_nd_opt *nd_opt = dp_packet_get_nd_payload(packet);
if (OVS_LIKELY(ns && nd_opt)) {
int bytes_remain = dp_packet_l4_size(packet) - sizeof(*ns);
struct in6_addr tgt_buf;
struct eth_addr sll_buf = eth_addr_zero;
struct eth_addr tll_buf = eth_addr_zero;
while (bytes_remain >= ND_OPT_LEN && nd_opt->nd_opt_len != 0) {
if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR
&& nd_opt->nd_opt_len == 1) {
sll_buf = nd_opt->nd_opt_mac;
ether_addr_copy_masked(&sll_buf, key->nd_sll, mask->nd_sll);
/* A packet can only contain one SLL or TLL option */
break;
} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LINKADDR
&& nd_opt->nd_opt_len == 1) {
tll_buf = nd_opt->nd_opt_mac;
ether_addr_copy_masked(&tll_buf, key->nd_tll, mask->nd_tll);
/* A packet can only contain one SLL or TLL option */
break;
}
nd_opt += nd_opt->nd_opt_len;
bytes_remain -= nd_opt->nd_opt_len * ND_OPT_LEN;
}
packet_set_nd(packet,
mask_ipv6_addr(ns->target.be32, &key->nd_target,
&mask->nd_target, &tgt_buf),
sll_buf,
tll_buf);
}
}
static void
odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
{
enum ovs_key_attr type = nl_attr_type(a);
const struct ovs_key_ipv4 *ipv4_key;
const struct ovs_key_ipv6 *ipv6_key;
struct pkt_metadata *md = &packet->md;
switch (type) {
case OVS_KEY_ATTR_PRIORITY:
md->skb_priority = nl_attr_get_u32(a);
break;
case OVS_KEY_ATTR_TUNNEL:
odp_set_tunnel_action(a, &md->tunnel);
break;
case OVS_KEY_ATTR_SKB_MARK:
md->pkt_mark = nl_attr_get_u32(a);
break;
case OVS_KEY_ATTR_ETHERNET:
odp_eth_set_addrs(packet, nl_attr_get(a), NULL);
break;
case OVS_KEY_ATTR_IPV4:
ipv4_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4));
packet_set_ipv4(packet, ipv4_key->ipv4_src,
ipv4_key->ipv4_dst, ipv4_key->ipv4_tos,
ipv4_key->ipv4_ttl);
break;
case OVS_KEY_ATTR_IPV6:
ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6));
packet_set_ipv6(packet, &ipv6_key->ipv6_src, &ipv6_key->ipv6_dst,
ipv6_key->ipv6_tclass, ipv6_key->ipv6_label,
ipv6_key->ipv6_hlimit);
break;
case OVS_KEY_ATTR_TCP:
if (OVS_LIKELY(dp_packet_get_tcp_payload(packet))) {
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
const struct ovs_key_tcp *tcp_key
= nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp));
packet_set_tcp_port(packet, tcp_key->tcp_src,
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
tcp_key->tcp_dst);
}
break;
case OVS_KEY_ATTR_UDP:
if (OVS_LIKELY(dp_packet_get_udp_payload(packet))) {
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
const struct ovs_key_udp *udp_key
= nl_attr_get_unspec(a, sizeof(struct ovs_key_udp));
packet_set_udp_port(packet, udp_key->udp_src,
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
udp_key->udp_dst);
}
break;
case OVS_KEY_ATTR_SCTP:
if (OVS_LIKELY(dp_packet_get_sctp_payload(packet))) {
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
const struct ovs_key_sctp *sctp_key
= nl_attr_get_unspec(a, sizeof(struct ovs_key_sctp));
packet_set_sctp_port(packet, sctp_key->sctp_src,
Fix setting transport ports with frags. Packets with 'LATER' fragment do not have a transport header, so it is not possible to either match on or set transport ports on such packets. Matching is prevented by augmenting mf_are_prereqs_ok() with a nw_frag 'LATER' bit check. Setting the transport headers on such packets is prevented in three ways: 1. Flows with an explicit match on nw_frag, where the LATER bit is 1: existing calls to the modified mf_are_prereqs_ok() prohibit using transport header fields (port numbers) in OXM/NXM actions (set_field, move). SET_TP_* actions need a new check on the LATER bit. 2. Flows that wildcard the nw_frag LATER bit: At flow translation time, add calls to mf_are_prereqs_ok() to make sure that we do not use transport ports in flows that do not have them. 3. At action execution time, do not set transport ports, if the packet does not have a full transport header. This ensures that we never call the packet_set functions, that require a valid transport header, with packets that do not have them. For example, if the flow was created with a IPv6 first fragment that had the full TCP header, but the next packet's first fragment is missing them. 3 alone would suffice for correct behavior, but 1 and 2 seem like a right thing to do, anyway. Currently, if we are setting port numbers, we will also match them, due to us tracking the set fields with the same flow_wildcards as the matched fields. Hence, if the incoming port number was not zero, the flow would not match any packets with missing or truncated transport headers. However, relying on no packets having zero port numbers would not be very robust. Also, we may separate the tracking of set and matched fields in the future, which would allow some flows that blindly set port numbers to not match on them at all. For TCP in case 3 we use ofpbuf_get_tcp_payload() that requires the whole (potentially variable size) TCP header to be present. However, when parsing a flow, we only require the fixed size portion of the TCP header to be present, which would be enough to set the port numbers and fix the TCP checksum. Finally, we add tests testing the new behavior. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2014-11-05 10:10:13 -08:00
sctp_key->sctp_dst);
}
break;
case OVS_KEY_ATTR_MPLS:
set_mpls_lse(packet, nl_attr_get_be32(a));
break;
case OVS_KEY_ATTR_ARP:
set_arp(packet, nl_attr_get(a), NULL);
break;
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ICMPV6:
if (OVS_LIKELY(dp_packet_get_icmp_payload(packet))) {
const struct ovs_key_icmp *icmp_key
= nl_attr_get_unspec(a, sizeof(struct ovs_key_icmp));
packet_set_icmp(packet, icmp_key->icmp_type, icmp_key->icmp_code);
}
break;
case OVS_KEY_ATTR_ND:
if (OVS_LIKELY(dp_packet_get_nd_payload(packet))) {
const struct ovs_key_nd *nd_key
= nl_attr_get_unspec(a, sizeof(struct ovs_key_nd));
packet_set_nd(packet, &nd_key->nd_target, nd_key->nd_sll,
nd_key->nd_tll);
}
break;
case OVS_KEY_ATTR_DP_HASH:
md->dp_hash = nl_attr_get_u32(a);
break;
case OVS_KEY_ATTR_RECIRC_ID:
md->recirc_id = nl_attr_get_u32(a);
break;
case OVS_KEY_ATTR_UNSPEC:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
case OVS_KEY_ATTR_IN_PORT:
case OVS_KEY_ATTR_VLAN:
case OVS_KEY_ATTR_TCP_FLAGS:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
case __OVS_KEY_ATTR_MAX:
default:
OVS_NOT_REACHED();
}
}
#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
static void
odp_execute_masked_set_action(struct dp_packet *packet,
const struct nlattr *a)
{
struct pkt_metadata *md = &packet->md;
enum ovs_key_attr type = nl_attr_type(a);
struct mpls_hdr *mh;
switch (type) {
case OVS_KEY_ATTR_PRIORITY:
md->skb_priority = nl_attr_get_u32(a)
| (md->skb_priority & ~*get_mask(a, uint32_t));
break;
case OVS_KEY_ATTR_SKB_MARK:
md->pkt_mark = nl_attr_get_u32(a)
| (md->pkt_mark & ~*get_mask(a, uint32_t));
break;
case OVS_KEY_ATTR_ETHERNET:
odp_eth_set_addrs(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_ethernet));
break;
case OVS_KEY_ATTR_IPV4:
odp_set_ipv4(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_ipv4));
break;
case OVS_KEY_ATTR_IPV6:
odp_set_ipv6(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_ipv6));
break;
case OVS_KEY_ATTR_TCP:
odp_set_tcp(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_tcp));
break;
case OVS_KEY_ATTR_UDP:
odp_set_udp(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_udp));
break;
case OVS_KEY_ATTR_SCTP:
odp_set_sctp(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_sctp));
break;
case OVS_KEY_ATTR_MPLS:
mh = dp_packet_l2_5(packet);
if (mh) {
put_16aligned_be32(&mh->mpls_lse, nl_attr_get_be32(a)
| (get_16aligned_be32(&mh->mpls_lse)
& ~*get_mask(a, ovs_be32)));
}
break;
case OVS_KEY_ATTR_ARP:
set_arp(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_arp));
break;
case OVS_KEY_ATTR_ND:
odp_set_nd(packet, nl_attr_get(a),
get_mask(a, struct ovs_key_nd));
break;
case OVS_KEY_ATTR_DP_HASH:
md->dp_hash = nl_attr_get_u32(a)
| (md->dp_hash & ~*get_mask(a, uint32_t));
break;
case OVS_KEY_ATTR_RECIRC_ID:
md->recirc_id = nl_attr_get_u32(a)
| (md->recirc_id & ~*get_mask(a, uint32_t));
break;
case OVS_KEY_ATTR_TUNNEL: /* Masked data not supported for tunnel. */
case OVS_KEY_ATTR_UNSPEC:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
case OVS_KEY_ATTR_IN_PORT:
case OVS_KEY_ATTR_VLAN:
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ICMPV6:
case OVS_KEY_ATTR_TCP_FLAGS:
case __OVS_KEY_ATTR_MAX:
default:
OVS_NOT_REACHED();
}
}
static void
odp_execute_sample(void *dp, struct dp_packet *packet, bool steal,
const struct nlattr *action,
odp_execute_cb dp_execute_action)
{
const struct nlattr *subactions = NULL;
const struct nlattr *a;
struct dp_packet_batch pb;
size_t left;
NL_NESTED_FOR_EACH_UNSAFE (a, left, action) {
int type = nl_attr_type(a);
switch ((enum ovs_sample_attr) type) {
case OVS_SAMPLE_ATTR_PROBABILITY:
if (random_uint32() >= nl_attr_get_u32(a)) {
if (steal) {
dp_packet_delete(packet);
}
return;
}
break;
case OVS_SAMPLE_ATTR_ACTIONS:
subactions = a;
break;
case OVS_SAMPLE_ATTR_UNSPEC:
case __OVS_SAMPLE_ATTR_MAX:
default:
OVS_NOT_REACHED();
}
}
dp_packet_batch_init_packet(&pb, packet);
odp_execute_actions(dp, &pb, steal, nl_attr_get(subactions),
nl_attr_get_size(subactions), dp_execute_action);
}
dpif-netdev: Add clone action Add support for userspace datapath clone action. The clone action provides an action envelope to enclose an action list. For example, with actions A, B, C and D, and an action list: A, clone(B, C), D The clone action will ensure that: - D will see the same packet, and any meta states, such as flow, as action B. - D will be executed regardless whether B, or C drops a packet. They can only drop a clone. - When B drops a packet, clone will skip all remaining actions within the clone envelope. This feature is useful when we add meter action later: The meter action can be implemented as a simple action without its own envolop (unlike the sample action). When necessary, the flow translation layer can enclose a meter action in clone. The clone action is very similar with the OpenFlow clone action. This is by design to simplify vswitchd flow translation logic. Without datapath clone, vswitchd simulate the effect by inserting datapath actions to "undo" clone actions. The above flow will be translated into A, B, C, -C, -B, D. However, there are two issues: - The resulting datapath action list may be longer without using clone. - Some actions, such as NAT may not be possible to reverse. This patch implements clone() simply with packet copy. The performance can be improved with later patches, for example, to delay or avoid packet copy if possible. It seems datapath should have enough context to carry out such optimization without the userspace context. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
static void
odp_execute_clone(void *dp, struct dp_packet *packet, bool steal,
const struct nlattr *actions,
odp_execute_cb dp_execute_action)
{
struct dp_packet_batch pb;
if (!steal) {
/* The 'actions' may modify the packet, but the modification
* should not propagate beyond this clone action. Make a copy
* the packet in case we don't own the packet, so that the
* 'actions' are only applied to the clone. 'odp_execute_actions'
* will free the clone. */
packet = dp_packet_clone(packet);
}
dp_packet_batch_init_packet(&pb, packet);
dpif-netdev: Add clone action Add support for userspace datapath clone action. The clone action provides an action envelope to enclose an action list. For example, with actions A, B, C and D, and an action list: A, clone(B, C), D The clone action will ensure that: - D will see the same packet, and any meta states, such as flow, as action B. - D will be executed regardless whether B, or C drops a packet. They can only drop a clone. - When B drops a packet, clone will skip all remaining actions within the clone envelope. This feature is useful when we add meter action later: The meter action can be implemented as a simple action without its own envolop (unlike the sample action). When necessary, the flow translation layer can enclose a meter action in clone. The clone action is very similar with the OpenFlow clone action. This is by design to simplify vswitchd flow translation logic. Without datapath clone, vswitchd simulate the effect by inserting datapath actions to "undo" clone actions. The above flow will be translated into A, B, C, -C, -B, D. However, there are two issues: - The resulting datapath action list may be longer without using clone. - Some actions, such as NAT may not be possible to reverse. This patch implements clone() simply with packet copy. The performance can be improved with later patches, for example, to delay or avoid packet copy if possible. It seems datapath should have enough context to carry out such optimization without the userspace context. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
odp_execute_actions(dp, &pb, true, nl_attr_get(actions),
nl_attr_get_size(actions), dp_execute_action);
}
static bool
requires_datapath_assistance(const struct nlattr *a)
{
enum ovs_action_attr type = nl_attr_type(a);
switch (type) {
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_ACTION_ATTR_CT:
return true;
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_PUSH_MPLS:
case OVS_ACTION_ATTR_POP_MPLS:
case OVS_ACTION_ATTR_TRUNC:
dpif-netdev: Add clone action Add support for userspace datapath clone action. The clone action provides an action envelope to enclose an action list. For example, with actions A, B, C and D, and an action list: A, clone(B, C), D The clone action will ensure that: - D will see the same packet, and any meta states, such as flow, as action B. - D will be executed regardless whether B, or C drops a packet. They can only drop a clone. - When B drops a packet, clone will skip all remaining actions within the clone envelope. This feature is useful when we add meter action later: The meter action can be implemented as a simple action without its own envolop (unlike the sample action). When necessary, the flow translation layer can enclose a meter action in clone. The clone action is very similar with the OpenFlow clone action. This is by design to simplify vswitchd flow translation logic. Without datapath clone, vswitchd simulate the effect by inserting datapath actions to "undo" clone actions. The above flow will be translated into A, B, C, -C, -B, D. However, there are two issues: - The resulting datapath action list may be longer without using clone. - Some actions, such as NAT may not be possible to reverse. This patch implements clone() simply with packet copy. The performance can be improved with later patches, for example, to delay or avoid packet copy if possible. It seems datapath should have enough context to carry out such optimization without the userspace context. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
case OVS_ACTION_ATTR_CLONE:
return false;
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
return false;
}
void
odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
const struct nlattr *actions, size_t actions_len,
odp_execute_cb dp_execute_action)
{
struct dp_packet *packet;
const struct nlattr *a;
unsigned int left;
NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
int type = nl_attr_type(a);
bool last_action = (left <= NLA_ALIGN(a->nla_len));
if (requires_datapath_assistance(a)) {
if (dp_execute_action) {
/* Allow 'dp_execute_action' to steal the packet data if we do
* not need it any more. */
bool may_steal = steal && last_action;
dp_execute_action(dp, batch, a, may_steal);
if (last_action) {
/* We do not need to free the packets. dp_execute_actions()
* has stolen them */
return;
}
}
continue;
}
switch ((enum ovs_action_attr) type) {
case OVS_ACTION_ATTR_HASH: {
const struct ovs_action_hash *hash_act = nl_attr_get(a);
/* Calculate a hash value directly. This might not match the
* value computed by the datapath, but it is much less expensive,
* and the current use case (bonding) does not require a strict
* match to work properly. */
if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
struct flow flow;
uint32_t hash;
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
flow_extract(packet, &flow);
hash = flow_hash_5tuple(&flow, hash_act->hash_basis);
packet->md.dp_hash = hash;
}
} else {
/* Assert on unknown hash algorithm. */
OVS_NOT_REACHED();
}
break;
}
case OVS_ACTION_ATTR_PUSH_VLAN: {
const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
}
break;
}
case OVS_ACTION_ATTR_POP_VLAN:
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
eth_pop_vlan(packet);
}
break;
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
push_mpls(packet, mpls->mpls_ethertype, mpls->mpls_lse);
}
break;
}
case OVS_ACTION_ATTR_POP_MPLS:
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
pop_mpls(packet, nl_attr_get_be16(a));
}
break;
case OVS_ACTION_ATTR_SET:
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
odp_execute_set_action(packet, nl_attr_get(a));
}
break;
case OVS_ACTION_ATTR_SET_MASKED:
DP_PACKET_BATCH_FOR_EACH(packet, batch) {
odp_execute_masked_set_action(packet, nl_attr_get(a));
}
break;
case OVS_ACTION_ATTR_SAMPLE:
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
odp_execute_sample(dp, packet, steal && last_action, a,
dp_execute_action);
}
if (last_action) {
/* We do not need to free the packets. odp_execute_sample() has
* stolen them*/
return;
}
break;
case OVS_ACTION_ATTR_TRUNC: {
const struct ovs_action_trunc *trunc =
nl_attr_get_unspec(a, sizeof *trunc);
batch->trunc = true;
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
dp_packet_set_cutlen(packet, trunc->max_len);
}
break;
}
dpif-netdev: Add clone action Add support for userspace datapath clone action. The clone action provides an action envelope to enclose an action list. For example, with actions A, B, C and D, and an action list: A, clone(B, C), D The clone action will ensure that: - D will see the same packet, and any meta states, such as flow, as action B. - D will be executed regardless whether B, or C drops a packet. They can only drop a clone. - When B drops a packet, clone will skip all remaining actions within the clone envelope. This feature is useful when we add meter action later: The meter action can be implemented as a simple action without its own envolop (unlike the sample action). When necessary, the flow translation layer can enclose a meter action in clone. The clone action is very similar with the OpenFlow clone action. This is by design to simplify vswitchd flow translation logic. Without datapath clone, vswitchd simulate the effect by inserting datapath actions to "undo" clone actions. The above flow will be translated into A, B, C, -C, -B, D. However, there are two issues: - The resulting datapath action list may be longer without using clone. - Some actions, such as NAT may not be possible to reverse. This patch implements clone() simply with packet copy. The performance can be improved with later patches, for example, to delay or avoid packet copy if possible. It seems datapath should have enough context to carry out such optimization without the userspace context. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
case OVS_ACTION_ATTR_CLONE:
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
odp_execute_clone(dp, packet, steal && last_action, a,
dpif-netdev: Add clone action Add support for userspace datapath clone action. The clone action provides an action envelope to enclose an action list. For example, with actions A, B, C and D, and an action list: A, clone(B, C), D The clone action will ensure that: - D will see the same packet, and any meta states, such as flow, as action B. - D will be executed regardless whether B, or C drops a packet. They can only drop a clone. - When B drops a packet, clone will skip all remaining actions within the clone envelope. This feature is useful when we add meter action later: The meter action can be implemented as a simple action without its own envolop (unlike the sample action). When necessary, the flow translation layer can enclose a meter action in clone. The clone action is very similar with the OpenFlow clone action. This is by design to simplify vswitchd flow translation logic. Without datapath clone, vswitchd simulate the effect by inserting datapath actions to "undo" clone actions. The above flow will be translated into A, B, C, -C, -B, D. However, there are two issues: - The resulting datapath action list may be longer without using clone. - Some actions, such as NAT may not be possible to reverse. This patch implements clone() simply with packet copy. The performance can be improved with later patches, for example, to delay or avoid packet copy if possible. It seems datapath should have enough context to carry out such optimization without the userspace context. Signed-off-by: Andy Zhou <azhou@ovn.org> Acked-by: Jarno Rajahalme <jarno@ovn.org>
2017-01-10 18:13:47 -08:00
dp_execute_action);
}
if (last_action) {
/* We do not need to free the packets. odp_execute_clone() has
* stolen them. */
return;
}
break;
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
}
dp_packet_delete_batch(batch, steal);
}