2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-23 10:28:00 +00:00
ovs/lib/tc.c

3229 lines
115 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009-2017 Nicira, Inc.
* Copyright (c) 2016 Mellanox Technologies, Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "tc.h"
#include <errno.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/rtnetlink.h>
#include <linux/tc_act/tc_csum.h>
#include <linux/tc_act/tc_gact.h>
#include <linux/tc_act/tc_mirred.h>
#include <linux/tc_act/tc_mpls.h>
#include <linux/tc_act/tc_pedit.h>
#include <linux/tc_act/tc_skbedit.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <linux/tc_act/tc_vlan.h>
#include <linux/tc_act/tc_ct.h>
#include <linux/gen_stats.h>
#include <net/if.h>
#include <unistd.h>
#include "byte-order.h"
#include "netlink-socket.h"
#include "netlink.h"
#include "openvswitch/ofpbuf.h"
#include "openvswitch/util.h"
#include "openvswitch/vlog.h"
#include "packets.h"
#include "timeval.h"
#include "unaligned.h"
#define MAX_PEDIT_OFFSETS 32
#ifndef TCM_IFINDEX_MAGIC_BLOCK
#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
#endif
#ifndef TCA_DUMP_FLAGS_TERSE
#define TCA_DUMP_FLAGS_TERSE (1 << 0)
#endif
#if TCA_MAX < 15
#define TCA_CHAIN 11
#define TCA_INGRESS_BLOCK 13
#define TCA_DUMP_FLAGS 15
#endif
#ifndef RTM_GETCHAIN
#define RTM_GETCHAIN 102
#endif
VLOG_DEFINE_THIS_MODULE(tc);
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
struct tc_pedit_key_ex {
enum pedit_header_type htype;
enum pedit_cmd cmd;
};
struct flower_key_to_pedit {
enum pedit_header_type htype;
int offset;
int flower_offset;
int size;
int boundary_shift;
};
static struct flower_key_to_pedit flower_pedit_map[] = {
{
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
12,
offsetof(struct tc_flower_key, ipv4.ipv4_src),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
16,
offsetof(struct tc_flower_key, ipv4.ipv4_dst),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
8,
offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
1,
offsetof(struct tc_flower_key, ipv4.rewrite_tos),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_tos),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
7,
offsetof(struct tc_flower_key, ipv6.rewrite_hlimit),
MEMBER_SIZEOF(struct tc_flower_key, ipv6.rewrite_hlimit),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
8,
offsetof(struct tc_flower_key, ipv6.ipv6_src),
MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
24,
offsetof(struct tc_flower_key, ipv6.ipv6_dst),
MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
0,
offsetof(struct tc_flower_key, ipv6.rewrite_tclass),
MEMBER_SIZEOF(struct tc_flower_key, ipv6.rewrite_tclass),
4
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
6,
offsetof(struct tc_flower_key, src_mac),
MEMBER_SIZEOF(struct tc_flower_key, src_mac),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
0,
offsetof(struct tc_flower_key, dst_mac),
MEMBER_SIZEOF(struct tc_flower_key, dst_mac),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
12,
offsetof(struct tc_flower_key, eth_type),
MEMBER_SIZEOF(struct tc_flower_key, eth_type),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
0,
offsetof(struct tc_flower_key, tcp_src),
MEMBER_SIZEOF(struct tc_flower_key, tcp_src),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
2,
offsetof(struct tc_flower_key, tcp_dst),
MEMBER_SIZEOF(struct tc_flower_key, tcp_dst),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
0,
offsetof(struct tc_flower_key, udp_src),
MEMBER_SIZEOF(struct tc_flower_key, udp_src),
0
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
2,
offsetof(struct tc_flower_key, udp_dst),
MEMBER_SIZEOF(struct tc_flower_key, udp_dst),
0
},
};
static inline int
csum_update_flag(struct tc_flower *flower,
enum pedit_header_type htype);
struct tcmsg *
tc_make_request(int ifindex, int type, unsigned int flags,
struct ofpbuf *request)
{
struct tcmsg *tcmsg;
ofpbuf_init(request, 512);
nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
tcmsg->tcm_family = AF_UNSPEC;
tcmsg->tcm_ifindex = ifindex;
/* Caller should fill in tcmsg->tcm_handle. */
/* Caller should fill in tcmsg->tcm_parent. */
return tcmsg;
}
static void request_from_tcf_id(struct tcf_id *id, uint16_t eth_type,
int type, unsigned int flags,
struct ofpbuf *request)
{
int ifindex = id->block_id ? TCM_IFINDEX_MAGIC_BLOCK : id->ifindex;
uint32_t ingress_parent = id->block_id ? : TC_INGRESS_PARENT;
struct tcmsg *tcmsg;
tcmsg = tc_make_request(ifindex, type, flags, request);
tcmsg->tcm_parent = (id->hook == TC_EGRESS) ?
TC_EGRESS_PARENT : ingress_parent;
tcmsg->tcm_info = tc_make_handle(id->prio, eth_type);
tcmsg->tcm_handle = id->handle;
if (id->chain) {
nl_msg_put_u32(request, TCA_CHAIN, id->chain);
}
}
int
tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
{
int error = nl_transact(NETLINK_ROUTE, request, replyp);
ofpbuf_uninit(request);
return error;
}
/* Adds or deletes a root qdisc on device with specified ifindex.
*
* The tc_qdisc_hook parameter determines if the qdisc is added on device
* ingress or egress.
*
* If tc_qdisc_hook is TC_INGRESS, this function is equivalent to running the
* following when 'add' is true:
* /sbin/tc qdisc add dev <devname> handle ffff: ingress
*
* This function is equivalent to running the following when 'add' is false:
* /sbin/tc qdisc del dev <devname> handle ffff: ingress
*
* If tc_qdisc_hook is TC_EGRESS, this function is equivalent to:
* /sbin/tc qdisc (add|del) dev <devname> handle ffff: clsact
*
* Where dev <devname> is the device with specified ifindex name.
*
* The configuration and stats may be seen with the following command:
* /sbin/tc -s qdisc show dev <devname>
*
* If block_id is greater than 0, then the ingress qdisc is added to a block.
* In this case, it is equivalent to running (when 'add' is true):
* /sbin/tc qdisc add dev <devname> ingress_block <block_id> ingress
*
* Returns 0 if successful, otherwise a positive errno value.
*/
int
tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id,
enum tc_qdisc_hook hook)
{
struct ofpbuf request;
struct tcmsg *tcmsg;
int error;
int type = add ? RTM_NEWQDISC : RTM_DELQDISC;
int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
tcmsg = tc_make_request(ifindex, type, flags, &request);
if (hook == TC_EGRESS) {
tcmsg->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
tcmsg->tcm_parent = TC_H_CLSACT;
nl_msg_put_string(&request, TCA_KIND, "clsact");
} else {
tcmsg->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
tcmsg->tcm_parent = TC_H_INGRESS;
nl_msg_put_string(&request, TCA_KIND, "ingress");
}
nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
if (hook == TC_INGRESS && block_id) {
nl_msg_put_u32(&request, TCA_INGRESS_BLOCK, block_id);
}
error = tc_transact(&request, NULL);
if (error) {
/* If we're deleting the qdisc, don't worry about some of the
* error conditions. */
if (!add && (error == ENOENT || error == EINVAL)) {
return 0;
}
return error;
}
return 0;
}
static const struct nl_policy tca_policy[] = {
[TCA_KIND] = { .type = NL_A_STRING, .optional = false, },
[TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
[TCA_CHAIN] = { .type = NL_A_U32, .optional = true, },
[TCA_STATS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_stats), .optional = true, },
[TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
};
static const struct nl_policy tca_chain_policy[] = {
[TCA_CHAIN] = { .type = NL_A_U32, .optional = false, },
};
static const struct nl_policy tca_flower_policy[] = {
[TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
.optional = true, },
[TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN, .optional = true, },
[TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN, .optional = true, },
[TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
[TCA_FLOWER_KEY_ARP_SIP] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ARP_TIP] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ARP_SHA] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ARP_THA] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ARP_OP] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_ARP_SIP_MASK] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ARP_TIP_MASK] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ARP_SHA_MASK] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ARP_THA_MASK] = { .type = NL_A_UNSPEC,
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ARP_OP_MASK] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
[TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
[TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_SCTP_SRC] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_SCTP_DST] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_MPLS_TTL] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_MPLS_TC] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_MPLS_BOS] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32,
.optional = true, },
[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32,
.optional = true, },
[TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_U16,
.optional = true, },
[TCA_FLOWER_KEY_FLAGS] = { .type = NL_A_BE32, .optional = true, },
[TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NL_A_BE32, .optional = true, },
[TCA_FLOWER_KEY_IP_TTL] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_IP_TOS] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NL_A_U16,
.optional = true, },
[TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NL_A_U16,
.optional = true, },
[TCA_FLOWER_KEY_CVLAN_ID] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_CVLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_KEY_CVLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_ENC_IP_TOS] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ENC_OPTS] = { .type = NL_A_NESTED, .optional = true, },
[TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NL_A_NESTED,
.optional = true, },
[TCA_FLOWER_KEY_CT_STATE] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_CT_ZONE] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NL_A_U16, .optional = true, },
[TCA_FLOWER_KEY_CT_MARK] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_CT_MARK_MASK] = { .type = NL_A_U32, .optional = true, },
[TCA_FLOWER_KEY_CT_LABELS] = { .type = NL_A_U128, .optional = true, },
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NL_A_U128,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NL_A_U8,
.optional = true, },
[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NL_A_U8,
.optional = true, },
};
static const struct nl_policy tca_flower_terse_policy[] = {
[TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
[TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
};
static void
nl_parse_flower_arp(struct nlattr **attrs, struct tc_flower *flower)
{
const struct eth_addr *eth;
if (attrs[TCA_FLOWER_KEY_ARP_SIP_MASK]) {
flower->key.arp.spa =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_SIP]);
flower->mask.arp.spa =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_SIP_MASK]);
}
if (attrs[TCA_FLOWER_KEY_ARP_TIP_MASK]) {
flower->key.arp.tpa =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_TIP]);
flower->mask.arp.tpa =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_TIP_MASK]);
}
if (attrs[TCA_FLOWER_KEY_ARP_SHA_MASK]) {
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_SHA], ETH_ALEN);
memcpy(&flower->key.arp.sha, eth, sizeof flower->key.arp.sha);
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_SHA_MASK], ETH_ALEN);
memcpy(&flower->mask.arp.sha, eth, sizeof flower->mask.arp.sha);
}
if (attrs[TCA_FLOWER_KEY_ARP_THA_MASK]) {
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_THA], ETH_ALEN);
memcpy(&flower->key.arp.tha, eth, sizeof flower->key.arp.tha);
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_THA_MASK], ETH_ALEN);
memcpy(&flower->mask.arp.tha, eth, sizeof flower->mask.arp.tha);
}
if (attrs[TCA_FLOWER_KEY_ARP_OP_MASK]) {
flower->key.arp.opcode =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ARP_OP]);
flower->mask.arp.opcode =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ARP_OP_MASK]);
}
}
static void
nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower)
{
const struct eth_addr *eth;
if (attrs[TCA_FLOWER_KEY_ETH_SRC_MASK]) {
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN);
memcpy(&flower->key.src_mac, eth, sizeof flower->key.src_mac);
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN);
memcpy(&flower->mask.src_mac, eth, sizeof flower->mask.src_mac);
}
if (attrs[TCA_FLOWER_KEY_ETH_DST_MASK]) {
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN);
memcpy(&flower->key.dst_mac, eth, sizeof flower->key.dst_mac);
eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN);
memcpy(&flower->mask.dst_mac, eth, sizeof flower->mask.dst_mac);
}
}
static void
nl_parse_flower_mpls(struct nlattr **attrs, struct tc_flower *flower)
{
uint8_t ttl, tc, bos;
uint32_t label;
if (!eth_type_mpls(flower->key.eth_type)) {
return;
}
flower->key.encap_eth_type[0] =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
flower->key.mpls_lse = 0;
flower->mask.mpls_lse = 0;
if (attrs[TCA_FLOWER_KEY_MPLS_TTL]) {
ttl = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_MPLS_TTL]);
set_mpls_lse_ttl(&flower->key.mpls_lse, ttl);
set_mpls_lse_ttl(&flower->mask.mpls_lse, 0xff);
}
if (attrs[TCA_FLOWER_KEY_MPLS_BOS]) {
bos = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_MPLS_BOS]);
set_mpls_lse_bos(&flower->key.mpls_lse, bos);
set_mpls_lse_bos(&flower->mask.mpls_lse, 0xff);
}
if (attrs[TCA_FLOWER_KEY_MPLS_TC]) {
tc = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_MPLS_TC]);
set_mpls_lse_tc(&flower->key.mpls_lse, tc);
set_mpls_lse_tc(&flower->mask.mpls_lse, 0xff);
}
if (attrs[TCA_FLOWER_KEY_MPLS_LABEL]) {
label = nl_attr_get_u32(attrs[TCA_FLOWER_KEY_MPLS_LABEL]);
set_mpls_lse_label(&flower->key.mpls_lse, htonl(label));
set_mpls_lse_label(&flower->mask.mpls_lse, OVS_BE32_MAX);
}
}
static void
nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower)
{
ovs_be16 encap_ethtype;
if (!eth_type_vlan(flower->key.eth_type)) {
return;
}
flower->key.encap_eth_type[0] =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
flower->mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff);
if (attrs[TCA_FLOWER_KEY_VLAN_ID]) {
flower->key.vlan_id[0] =
nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]);
flower->mask.vlan_id[0] = VLAN_VID_MASK >> VLAN_VID_SHIFT;
}
if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) {
flower->key.vlan_prio[0] =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]);
flower->mask.vlan_prio[0] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT;
}
if (!attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
return;
}
encap_ethtype = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
if (!eth_type_vlan(encap_ethtype)) {
return;
}
flower->key.encap_eth_type[1] = flower->key.encap_eth_type[0];
flower->mask.encap_eth_type[1] = CONSTANT_HTONS(0xffff);
flower->key.encap_eth_type[0] = encap_ethtype;
if (attrs[TCA_FLOWER_KEY_CVLAN_ID]) {
flower->key.vlan_id[1] =
nl_attr_get_u16(attrs[TCA_FLOWER_KEY_CVLAN_ID]);
flower->mask.vlan_id[1] = VLAN_VID_MASK >> VLAN_VID_SHIFT;
}
if (attrs[TCA_FLOWER_KEY_CVLAN_PRIO]) {
flower->key.vlan_prio[1] =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_CVLAN_PRIO]);
flower->mask.vlan_prio[1] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT;
}
}
static int
nl_parse_geneve_key(const struct nlattr *in_nlattr,
struct tun_metadata *metadata)
{
struct geneve_opt *opt = NULL;
const struct ofpbuf *msg;
uint16_t last_opt_type;
struct nlattr *nla;
struct ofpbuf buf;
size_t left;
int cnt;
nl_attr_get_nested(in_nlattr, &buf);
msg = &buf;
last_opt_type = TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC;
cnt = 0;
NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(msg, 0, 0), msg->size) {
uint16_t type = nl_attr_type(nla);
switch (type) {
case TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS:
if (cnt && last_opt_type != TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA) {
VLOG_ERR_RL(&error_rl, "failed to parse tun options class");
return EINVAL;
}
opt = &metadata->opts.gnv[cnt];
opt->opt_class = nl_attr_get_be16(nla);
cnt += sizeof(struct geneve_opt) / 4;
metadata->present.len += sizeof(struct geneve_opt);
last_opt_type = TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS;
break;
case TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE:
if (last_opt_type != TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS) {
VLOG_ERR_RL(&error_rl, "failed to parse tun options type");
return EINVAL;
}
opt->type = nl_attr_get_u8(nla);
last_opt_type = TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE;
break;
case TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA:
if (last_opt_type != TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE) {
VLOG_ERR_RL(&error_rl, "failed to parse tun options data");
return EINVAL;
}
opt->length = nl_attr_get_size(nla) / 4;
memcpy(opt + 1, nl_attr_get_unspec(nla, 1), opt->length * 4);
cnt += opt->length;
metadata->present.len += opt->length * 4;
last_opt_type = TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA;
break;
}
}
if (last_opt_type != TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA) {
VLOG_ERR_RL(&error_rl, "failed to parse tun options without data");
return EINVAL;
}
return 0;
}
static int
nl_parse_flower_tunnel_opts(struct nlattr *options,
struct tun_metadata *metadata)
{
const struct ofpbuf *msg;
struct nlattr *nla;
struct ofpbuf buf;
size_t left;
int err;
nl_attr_get_nested(options, &buf);
msg = &buf;
NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(msg, 0, 0), msg->size) {
uint16_t type = nl_attr_type(nla);
switch (type) {
case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
err = nl_parse_geneve_key(nla, metadata);
if (err) {
return err;
}
break;
}
}
return 0;
}
static int
flower_tun_geneve_opt_check_len(struct tun_metadata *key,
struct tun_metadata *mask)
{
const struct geneve_opt *opt, *opt_mask;
int len, cnt = 0;
len = key->present.len;
while (len) {
opt = &key->opts.gnv[cnt];
opt_mask = &mask->opts.gnv[cnt];
if (opt->length != opt_mask->length) {
VLOG_ERR_RL(&error_rl,
"failed to parse tun options; key/mask length differ");
return EINVAL;
}
cnt += sizeof(struct geneve_opt) / 4 + opt->length;
len -= sizeof(struct geneve_opt) + opt->length * 4;
}
return 0;
}
static int
nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower)
{
int err;
if (attrs[TCA_FLOWER_KEY_ENC_KEY_ID]) {
ovs_be32 id = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_KEY_ID]);
flower->key.tunnel.id = be32_to_be64(id);
flower->mask.tunnel.id = OVS_BE64_MAX;
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]) {
flower->mask.tunnel.ipv4.ipv4_src =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]);
flower->key.tunnel.ipv4.ipv4_src =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]) {
flower->mask.tunnel.ipv4.ipv4_dst =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]);
flower->key.tunnel.ipv4.ipv4_dst =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]) {
flower->mask.tunnel.ipv6.ipv6_src =
nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]);
flower->key.tunnel.ipv6.ipv6_src =
nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]) {
flower->mask.tunnel.ipv6.ipv6_dst =
nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]);
flower->key.tunnel.ipv6.ipv6_dst =
nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST]);
}
if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) {
flower->key.tunnel.tp_dst =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IP_TOS_MASK]) {
flower->key.tunnel.tos =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ENC_IP_TOS]);
flower->mask.tunnel.tos =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ENC_IP_TOS_MASK]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IP_TTL_MASK]) {
flower->key.tunnel.ttl =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ENC_IP_TTL]);
flower->mask.tunnel.ttl =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ENC_IP_TTL_MASK]);
}
if (!is_all_zeros(&flower->mask.tunnel, sizeof flower->mask.tunnel) ||
!is_all_zeros(&flower->key.tunnel, sizeof flower->key.tunnel)) {
flower->tunnel = true;
}
if (attrs[TCA_FLOWER_KEY_ENC_OPTS] &&
attrs[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
err = nl_parse_flower_tunnel_opts(attrs[TCA_FLOWER_KEY_ENC_OPTS],
&flower->key.tunnel.metadata);
if (err) {
return err;
}
err = nl_parse_flower_tunnel_opts(attrs[TCA_FLOWER_KEY_ENC_OPTS_MASK],
&flower->mask.tunnel.metadata);
if (err) {
return err;
}
err = flower_tun_geneve_opt_check_len(&flower->key.tunnel.metadata,
&flower->mask.tunnel.metadata);
if (err) {
return err;
}
} else if (attrs[TCA_FLOWER_KEY_ENC_OPTS]) {
VLOG_ERR_RL(&error_rl,
"failed to parse tun options; no mask supplied");
return EINVAL;
} else if (attrs[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
VLOG_ERR_RL(&error_rl, "failed to parse tun options; no key supplied");
return EINVAL;
}
return 0;
}
static void
nl_parse_flower_ct_match(struct nlattr **attrs, struct tc_flower *flower) {
struct tc_flower_key *key = &flower->key;
struct tc_flower_key *mask = &flower->mask;
struct nlattr *attr_key, *attr_mask;
attr_key = attrs[TCA_FLOWER_KEY_CT_STATE];
attr_mask = attrs[TCA_FLOWER_KEY_CT_STATE_MASK];
if (attr_mask) {
key->ct_state = nl_attr_get_u16(attr_key);
mask->ct_state = nl_attr_get_u16(attr_mask);
}
attr_key = attrs[TCA_FLOWER_KEY_CT_ZONE];
attr_mask = attrs[TCA_FLOWER_KEY_CT_ZONE_MASK];
if (attrs[TCA_FLOWER_KEY_CT_ZONE_MASK]) {
key->ct_zone = nl_attr_get_u16(attr_key);
mask->ct_zone = nl_attr_get_u16(attr_mask);
}
attr_key = attrs[TCA_FLOWER_KEY_CT_MARK];
attr_mask = attrs[TCA_FLOWER_KEY_CT_MARK_MASK];
if (attrs[TCA_FLOWER_KEY_CT_MARK_MASK]) {
key->ct_mark = nl_attr_get_u32(attr_key);
mask->ct_mark = nl_attr_get_u32(attr_mask);
}
attr_key = attrs[TCA_FLOWER_KEY_CT_LABELS];
attr_mask = attrs[TCA_FLOWER_KEY_CT_LABELS_MASK];
if (attrs[TCA_FLOWER_KEY_CT_LABELS_MASK]) {
key->ct_label = nl_attr_get_u128(attr_key);
mask->ct_label = nl_attr_get_u128(attr_mask);
}
}
static void
nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
uint8_t ip_proto = 0;
struct tc_flower_key *key = &flower->key;
struct tc_flower_key *mask = &flower->mask;
if (attrs[TCA_FLOWER_KEY_IP_PROTO]) {
ip_proto = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_PROTO]);
key->ip_proto = ip_proto;
mask->ip_proto = UINT8_MAX;
}
if (attrs[TCA_FLOWER_KEY_FLAGS_MASK]) {
key->flags = ntohl(nl_attr_get_be32(attrs[TCA_FLOWER_KEY_FLAGS]));
mask->flags =
ntohl(nl_attr_get_be32(attrs[TCA_FLOWER_KEY_FLAGS_MASK]));
}
if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) {
key->ipv4.ipv4_src =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]);
mask->ipv4.ipv4_src =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]);
}
if (attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]) {
key->ipv4.ipv4_dst =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST]);
mask->ipv4.ipv4_dst =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]);
}
if (attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK]) {
struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_SRC];
struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK];
key->ipv6.ipv6_src = nl_attr_get_in6_addr(attr);
mask->ipv6.ipv6_src = nl_attr_get_in6_addr(attr_mask);
}
if (attrs[TCA_FLOWER_KEY_IPV6_DST_MASK]) {
struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_DST];
struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_DST_MASK];
key->ipv6.ipv6_dst = nl_attr_get_in6_addr(attr);
mask->ipv6.ipv6_dst = nl_attr_get_in6_addr(attr_mask);
}
if (ip_proto == IPPROTO_TCP) {
if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
key->tcp_src =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
mask->tcp_src =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
}
if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
key->tcp_dst =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
mask->tcp_dst =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
}
if (attrs[TCA_FLOWER_KEY_TCP_FLAGS_MASK]) {
key->tcp_flags =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_FLAGS]);
mask->tcp_flags =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_FLAGS_MASK]);
}
} else if (ip_proto == IPPROTO_UDP) {
if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
key->udp_src = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
mask->udp_src =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
}
if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
key->udp_dst = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
mask->udp_dst =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
}
} else if (ip_proto == IPPROTO_SCTP) {
if (attrs[TCA_FLOWER_KEY_SCTP_SRC_MASK]) {
key->sctp_src = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_SCTP_SRC]);
mask->sctp_src =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_SCTP_SRC_MASK]);
}
if (attrs[TCA_FLOWER_KEY_SCTP_DST_MASK]) {
key->sctp_dst = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_SCTP_DST]);
mask->sctp_dst =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_SCTP_DST_MASK]);
}
} else if (ip_proto == IPPROTO_ICMP) {
if (attrs[TCA_FLOWER_KEY_ICMPV4_CODE_MASK]) {
key->icmp_code =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]);
mask->icmp_code =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]);
}
if (attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]) {
key->icmp_type =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]);
mask->icmp_type =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]);
}
} else if (ip_proto == IPPROTO_ICMPV6) {
if (attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]) {
key->icmp_code =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]);
mask->icmp_code =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]);
}
if (attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]) {
key->icmp_type =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]);
mask->icmp_type =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]);
}
}
if (attrs[TCA_FLOWER_KEY_IP_TTL_MASK]) {
key->ip_ttl = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_TTL]);
mask->ip_ttl = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_TTL_MASK]);
}
if (attrs[TCA_FLOWER_KEY_IP_TOS_MASK]) {
key->ip_tos = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_TOS]);
mask->ip_tos = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_TOS_MASK]);
}
nl_parse_flower_ct_match(attrs, flower);
}
static enum tc_offloaded_state
nl_get_flower_offloaded_state(struct nlattr **attrs)
{
uint32_t flower_flags = 0;
if (attrs[TCA_FLOWER_FLAGS]) {
flower_flags = nl_attr_get_u32(attrs[TCA_FLOWER_FLAGS]);
if (flower_flags & TCA_CLS_FLAGS_NOT_IN_HW) {
return TC_OFFLOADED_STATE_NOT_IN_HW;
} else if (flower_flags & TCA_CLS_FLAGS_IN_HW) {
return TC_OFFLOADED_STATE_IN_HW;
}
}
return TC_OFFLOADED_STATE_UNDEFINED;
}
static void
nl_parse_flower_flags(struct nlattr **attrs, struct tc_flower *flower)
{
flower->offloaded_state = nl_get_flower_offloaded_state(attrs);
}
static const struct nl_policy pedit_policy[] = {
[TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_pedit),
.optional = false, },
[TCA_PEDIT_KEYS_EX] = { .type = NL_A_NESTED,
.optional = false, },
};
static int
nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
{
struct tc_action *action = &flower->actions[flower->action_count++];
struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
const struct tc_pedit *pe;
const struct tc_pedit_key *keys;
const struct nlattr *nla, *keys_ex, *ex_type;
const void *keys_attr;
char *rewrite_key = (void *) &action->rewrite.key;
char *rewrite_mask = (void *) &action->rewrite.mask;
size_t keys_ex_size, left;
int type, i = 0, err;
if (!nl_parse_nested(options, pedit_policy, pe_attrs,
ARRAY_SIZE(pedit_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse pedit action options");
return EPROTO;
}
pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof *pe);
keys = pe->keys;
keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX];
keys_ex = nl_attr_get(keys_attr);
keys_ex_size = nl_attr_get_size(keys_attr);
NL_ATTR_FOR_EACH (nla, left, keys_ex, keys_ex_size) {
if (i >= pe->nkeys) {
break;
}
if (nl_attr_type(nla) != TCA_PEDIT_KEY_EX) {
VLOG_ERR_RL(&error_rl, "unable to parse legacy pedit type: %d",
nl_attr_type(nla));
return EOPNOTSUPP;
}
ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
type = nl_attr_get_u16(ex_type);
err = csum_update_flag(flower, type);
if (err) {
return err;
}
for (int j = 0; j < ARRAY_SIZE(flower_pedit_map); j++) {
struct flower_key_to_pedit *m = &flower_pedit_map[j];
int flower_off = m->flower_offset;
int sz = m->size;
int mf = m->offset;
int ef = ROUND_UP(mf, 4);
if (m->htype != type) {
continue;
}
/* check overlap between current pedit key, which is always
* 4 bytes (range [off, off + 3]), and a map entry in
* flower_pedit_map sf = ROUND_DOWN(mf, 4)
* (range [sf|mf, (mf + sz - 1)|ef]) */
if ((keys->off >= mf && keys->off < mf + sz)
|| (keys->off + 3 >= mf && keys->off + 3 < ef)) {
int diff = flower_off + (keys->off - mf);
ovs_be32 *dst = (void *) (rewrite_key + diff);
ovs_be32 *dst_m = (void *) (rewrite_mask + diff);
ovs_be32 mask, mask_word, data_word;
uint32_t zero_bits;
mask_word = htonl(ntohl(keys->mask) << m->boundary_shift);
data_word = htonl(ntohl(keys->val) << m->boundary_shift);
mask = ~(mask_word);
if (keys->off < mf) {
zero_bits = 8 * (mf - keys->off);
mask &= htonl(UINT32_MAX >> zero_bits);
} else if (keys->off + 4 > mf + m->size) {
zero_bits = 8 * (keys->off + 4 - mf - m->size);
mask &= htonl(UINT32_MAX << zero_bits);
}
*dst_m |= mask;
*dst |= data_word & mask;
}
}
keys++;
i++;
}
action->type = TC_ACT_PEDIT;
return 0;
}
static const struct nl_policy tunnel_key_policy[] = {
[TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_tunnel_key),
.optional = false, },
[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
[TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NL_A_U32, .optional = true, },
[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct in6_addr),
.optional = true, },
[TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NL_A_U32, .optional = true, },
[TCA_TUNNEL_KEY_ENC_DST_PORT] = { .type = NL_A_U16, .optional = true, },
[TCA_TUNNEL_KEY_ENC_TOS] = { .type = NL_A_U8, .optional = true, },
[TCA_TUNNEL_KEY_ENC_TTL] = { .type = NL_A_U8, .optional = true, },
[TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NL_A_NESTED, .optional = true, },
[TCA_TUNNEL_KEY_NO_CSUM] = { .type = NL_A_U8, .optional = true, },
};
static int
nl_parse_act_geneve_opts(const struct nlattr *in_nlattr,
struct tc_action *action)
{
struct geneve_opt *opt = NULL;
const struct ofpbuf *msg;
uint16_t last_opt_type;
struct nlattr *nla;
struct ofpbuf buf;
size_t left;
int cnt;
nl_attr_get_nested(in_nlattr, &buf);
msg = &buf;
last_opt_type = TCA_TUNNEL_KEY_ENC_OPT_GENEVE_UNSPEC;
cnt = 0;
NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(msg, 0, 0), msg->size) {
uint16_t type = nl_attr_type(nla);
switch (type) {
case TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS:
if (cnt && last_opt_type != TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA) {
VLOG_ERR_RL(&error_rl,
"failed to parse action geneve options class");
return EINVAL;
}
opt = &action->encap.data.opts.gnv[cnt];
opt->opt_class = nl_attr_get_be16(nla);
cnt += sizeof(struct geneve_opt) / 4;
action->encap.data.present.len += sizeof(struct geneve_opt);
last_opt_type = TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS;
break;
case TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE:
if (last_opt_type != TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS) {
VLOG_ERR_RL(&error_rl,
"failed to parse action geneve options type");
return EINVAL;
}
opt->type = nl_attr_get_u8(nla);
last_opt_type = TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE;
break;
case TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA:
if (last_opt_type != TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE) {
VLOG_ERR_RL(&error_rl,
"failed to parse action geneve options data");
return EINVAL;
}
opt->length = nl_attr_get_size(nla) / 4;
memcpy(opt + 1, nl_attr_get_unspec(nla, 1), opt->length * 4);
cnt += opt->length;
action->encap.data.present.len += opt->length * 4;
last_opt_type = TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA;
break;
}
}
if (last_opt_type != TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA) {
VLOG_ERR_RL(&error_rl,
"failed to parse action geneve options without data");
return EINVAL;
}
return 0;
}
static int
nl_parse_act_tunnel_opts(struct nlattr *options, struct tc_action *action)
{
const struct ofpbuf *msg;
struct nlattr *nla;
struct ofpbuf buf;
size_t left;
int err;
if (!options) {
return 0;
}
nl_attr_get_nested(options, &buf);
msg = &buf;
NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(msg, 0, 0), msg->size) {
uint16_t type = nl_attr_type(nla);
switch (type) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
err = nl_parse_act_geneve_opts(nla, action);
if (err) {
return err;
}
break;
}
}
return 0;
}
static int
nl_parse_act_tunnel_key(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *tun_attrs[ARRAY_SIZE(tunnel_key_policy)];
const struct nlattr *tun_parms;
const struct tc_tunnel_key *tun;
struct tc_action *action;
int err;
if (!nl_parse_nested(options, tunnel_key_policy, tun_attrs,
ARRAY_SIZE(tunnel_key_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse tunnel_key action options");
return EPROTO;
}
tun_parms = tun_attrs[TCA_TUNNEL_KEY_PARMS];
tun = nl_attr_get_unspec(tun_parms, sizeof *tun);
if (tun->t_action == TCA_TUNNEL_KEY_ACT_SET) {
struct nlattr *id = tun_attrs[TCA_TUNNEL_KEY_ENC_KEY_ID];
struct nlattr *dst_port = tun_attrs[TCA_TUNNEL_KEY_ENC_DST_PORT];
struct nlattr *ipv4_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_SRC];
struct nlattr *ipv4_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_DST];
struct nlattr *ipv6_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV6_SRC];
struct nlattr *ipv6_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV6_DST];
struct nlattr *tos = tun_attrs[TCA_TUNNEL_KEY_ENC_TOS];
struct nlattr *ttl = tun_attrs[TCA_TUNNEL_KEY_ENC_TTL];
struct nlattr *tun_opt = tun_attrs[TCA_TUNNEL_KEY_ENC_OPTS];
struct nlattr *no_csum = tun_attrs[TCA_TUNNEL_KEY_NO_CSUM];
action = &flower->actions[flower->action_count++];
action->type = TC_ACT_ENCAP;
action->encap.ipv4.ipv4_src = ipv4_src ? nl_attr_get_be32(ipv4_src) : 0;
action->encap.ipv4.ipv4_dst = ipv4_dst ? nl_attr_get_be32(ipv4_dst) : 0;
if (ipv6_src) {
action->encap.ipv6.ipv6_src = nl_attr_get_in6_addr(ipv6_src);
}
if (ipv6_dst) {
action->encap.ipv6.ipv6_dst = nl_attr_get_in6_addr(ipv6_dst);
}
action->encap.id = id ? be32_to_be64(nl_attr_get_be32(id)) : 0;
action->encap.id_present = id ? true : false;
action->encap.tp_dst = dst_port ? nl_attr_get_be16(dst_port) : 0;
action->encap.tos = tos ? nl_attr_get_u8(tos) : 0;
action->encap.ttl = ttl ? nl_attr_get_u8(ttl) : 0;
action->encap.no_csum = no_csum ? nl_attr_get_u8(no_csum) : 0;
err = nl_parse_act_tunnel_opts(tun_opt, action);
if (err) {
return err;
}
} else if (tun->t_action == TCA_TUNNEL_KEY_ACT_RELEASE) {
flower->tunnel = true;
} else {
VLOG_ERR_RL(&error_rl, "unknown tunnel actions: %d, %d",
tun->action, tun->t_action);
return EINVAL;
}
return 0;
}
static const struct nl_policy gact_policy[] = {
[TCA_GACT_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_gact),
.optional = false, },
[TCA_GACT_TM] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tcf_t),
.optional = false, },
};
static int
get_user_hz(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
static int user_hz = 100;
if (ovsthread_once_start(&once)) {
user_hz = sysconf(_SC_CLK_TCK);
ovsthread_once_done(&once);
}
return user_hz;
}
static void
nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
{
flower->lastused = time_msec() - (tm->lastuse * 1000 / get_user_hz());
}
static int
nl_parse_act_gact(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
const struct tc_gact *p;
struct nlattr *gact_parms;
const struct tcf_t *tm;
struct tc_action *action;
if (!nl_parse_nested(options, gact_policy, gact_attrs,
ARRAY_SIZE(gact_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse gact action options");
return EPROTO;
}
gact_parms = gact_attrs[TCA_GACT_PARMS];
p = nl_attr_get_unspec(gact_parms, sizeof *p);
if (TC_ACT_EXT_CMP(p->action, TC_ACT_GOTO_CHAIN)) {
action = &flower->actions[flower->action_count++];
action->chain = p->action & TC_ACT_EXT_VAL_MASK;
action->type = TC_ACT_GOTO;
} else if (p->action != TC_ACT_SHOT) {
VLOG_ERR_RL(&error_rl, "unknown gact action: %d", p->action);
return EINVAL;
}
tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm);
nl_parse_tcf(tm, flower);
return 0;
}
static const struct nl_policy mirred_policy[] = {
[TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_mirred),
.optional = false, },
[TCA_MIRRED_TM] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tcf_t),
.optional = false, },
};
static int
nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)];
const struct tc_mirred *m;
const struct nlattr *mirred_parms;
const struct tcf_t *tm;
struct nlattr *mirred_tm;
struct tc_action *action;
if (!nl_parse_nested(options, mirred_policy, mirred_attrs,
ARRAY_SIZE(mirred_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse mirred action options");
return EPROTO;
}
mirred_parms = mirred_attrs[TCA_MIRRED_PARMS];
m = nl_attr_get_unspec(mirred_parms, sizeof *m);
if (m->eaction != TCA_EGRESS_REDIR && m->eaction != TCA_EGRESS_MIRROR &&
m->eaction != TCA_INGRESS_REDIR && m->eaction != TCA_INGRESS_MIRROR) {
VLOG_ERR_RL(&error_rl, "unknown mirred action: %d, %d, %d",
m->action, m->eaction, m->ifindex);
return EINVAL;
}
action = &flower->actions[flower->action_count++];
action->out.ifindex_out = m->ifindex;
if (m->eaction == TCA_INGRESS_REDIR || m->eaction == TCA_INGRESS_MIRROR) {
action->out.ingress = true;
} else {
action->out.ingress = false;
}
action->type = TC_ACT_OUTPUT;
mirred_tm = mirred_attrs[TCA_MIRRED_TM];
tm = nl_attr_get_unspec(mirred_tm, sizeof *tm);
nl_parse_tcf(tm, flower);
return 0;
}
static const struct nl_policy ct_policy[] = {
[TCA_CT_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_ct),
.optional = false, },
[TCA_CT_ACTION] = { .type = NL_A_U16,
.optional = true, },
[TCA_CT_ZONE] = { .type = NL_A_U16,
.optional = true, },
[TCA_CT_MARK] = { .type = NL_A_U32,
.optional = true, },
[TCA_CT_MARK_MASK] = { .type = NL_A_U32,
.optional = true, },
[TCA_CT_LABELS] = { .type = NL_A_UNSPEC,
.optional = true, },
[TCA_CT_LABELS_MASK] = { .type = NL_A_UNSPEC,
.optional = true, },
[TCA_CT_NAT_IPV4_MIN] = { .type = NL_A_U32,
.optional = true, },
[TCA_CT_NAT_IPV4_MAX] = { .type = NL_A_U32,
.optional = true, },
[TCA_CT_NAT_IPV6_MIN] = { .min_len = sizeof(struct in6_addr),
.type = NL_A_UNSPEC,
.optional = true },
[TCA_CT_NAT_IPV6_MAX] = { .min_len = sizeof(struct in6_addr),
.type = NL_A_UNSPEC,
.optional = true },
[TCA_CT_NAT_PORT_MIN] = { .type = NL_A_U16,
.optional = true, },
[TCA_CT_NAT_PORT_MAX] = { .type = NL_A_U16,
.optional = true, },
};
static int
nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *ct_attrs[ARRAY_SIZE(ct_policy)];
const struct nlattr *ct_parms;
struct tc_action *action;
const struct tc_ct *ct;
uint16_t ct_action = 0;
if (!nl_parse_nested(options, ct_policy, ct_attrs,
ARRAY_SIZE(ct_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse ct action options");
return EPROTO;
}
ct_parms = ct_attrs[TCA_CT_PARMS];
ct = nl_attr_get_unspec(ct_parms, sizeof *ct);
if (ct_attrs[TCA_CT_ACTION]) {
ct_action = nl_attr_get_u16(ct_attrs[TCA_CT_ACTION]);
}
action = &flower->actions[flower->action_count++];
action->ct.clear = ct_action & TCA_CT_ACT_CLEAR;
if (!action->ct.clear) {
struct nlattr *zone = ct_attrs[TCA_CT_ZONE];
struct nlattr *mark = ct_attrs[TCA_CT_MARK];
struct nlattr *mark_mask = ct_attrs[TCA_CT_MARK_MASK];
struct nlattr *label = ct_attrs[TCA_CT_LABELS];
struct nlattr *label_mask = ct_attrs[TCA_CT_LABELS_MASK];
action->ct.commit = ct_action & TCA_CT_ACT_COMMIT;
action->ct.force = ct_action & TCA_CT_ACT_FORCE;
action->ct.zone = zone ? nl_attr_get_u16(zone) : 0;
action->ct.mark = mark ? nl_attr_get_u32(mark) : 0;
action->ct.mark_mask = mark_mask ? nl_attr_get_u32(mark_mask) : 0;
action->ct.label = label? nl_attr_get_u128(label) : OVS_U128_ZERO;
action->ct.label_mask = label_mask ?
nl_attr_get_u128(label_mask) : OVS_U128_ZERO;
if (ct_action & TCA_CT_ACT_NAT) {
struct nlattr *ipv4_min = ct_attrs[TCA_CT_NAT_IPV4_MIN];
struct nlattr *ipv4_max = ct_attrs[TCA_CT_NAT_IPV4_MAX];
struct nlattr *ipv6_min = ct_attrs[TCA_CT_NAT_IPV6_MIN];
struct nlattr *ipv6_max = ct_attrs[TCA_CT_NAT_IPV6_MAX];
struct nlattr *port_min = ct_attrs[TCA_CT_NAT_PORT_MIN];
struct nlattr *port_max = ct_attrs[TCA_CT_NAT_PORT_MAX];
action->ct.nat_type = TC_NAT_RESTORE;
if (ct_action & TCA_CT_ACT_NAT_SRC) {
action->ct.nat_type = TC_NAT_SRC;
} else if (ct_action & TCA_CT_ACT_NAT_DST) {
action->ct.nat_type = TC_NAT_DST;
}
if (ipv4_min) {
action->ct.range.ip_family = AF_INET;
action->ct.range.ipv4.min = nl_attr_get_be32(ipv4_min);
if (ipv4_max) {
ovs_be32 addr = nl_attr_get_be32(ipv4_max);
action->ct.range.ipv4.max = addr;
}
} else if (ipv6_min) {
action->ct.range.ip_family = AF_INET6;
action->ct.range.ipv6.min
= nl_attr_get_in6_addr(ipv6_min);
if (ipv6_max) {
struct in6_addr addr = nl_attr_get_in6_addr(ipv6_max);
action->ct.range.ipv6.max = addr;
}
}
if (port_min) {
action->ct.range.port.min = nl_attr_get_be16(port_min);
if (port_max) {
action->ct.range.port.max = nl_attr_get_be16(port_max);
}
}
}
}
action->type = TC_ACT_CT;
return 0;
}
static const struct nl_policy vlan_policy[] = {
[TCA_VLAN_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_vlan),
.optional = false, },
[TCA_VLAN_PUSH_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
[TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NL_A_U16, .optional = true, },
[TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NL_A_U8, .optional = true, },
};
static int
nl_parse_act_vlan(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *vlan_attrs[ARRAY_SIZE(vlan_policy)];
const struct tc_vlan *v;
const struct nlattr *vlan_parms;
struct tc_action *action;
if (!nl_parse_nested(options, vlan_policy, vlan_attrs,
ARRAY_SIZE(vlan_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse vlan action options");
return EPROTO;
}
action = &flower->actions[flower->action_count++];
vlan_parms = vlan_attrs[TCA_VLAN_PARMS];
v = nl_attr_get_unspec(vlan_parms, sizeof *v);
if (v->v_action == TCA_VLAN_ACT_PUSH) {
struct nlattr *vlan_tpid = vlan_attrs[TCA_VLAN_PUSH_VLAN_PROTOCOL];
struct nlattr *vlan_id = vlan_attrs[TCA_VLAN_PUSH_VLAN_ID];
struct nlattr *vlan_prio = vlan_attrs[TCA_VLAN_PUSH_VLAN_PRIORITY];
action->vlan.vlan_push_tpid = nl_attr_get_be16(vlan_tpid);
action->vlan.vlan_push_id = nl_attr_get_u16(vlan_id);
action->vlan.vlan_push_prio = vlan_prio ? nl_attr_get_u8(vlan_prio) : 0;
action->type = TC_ACT_VLAN_PUSH;
} else if (v->v_action == TCA_VLAN_ACT_POP) {
action->type = TC_ACT_VLAN_POP;
} else {
VLOG_ERR_RL(&error_rl, "unknown vlan action: %d, %d",
v->action, v->v_action);
return EINVAL;
}
return 0;
}
static const struct nl_policy mpls_policy[] = {
[TCA_MPLS_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_mpls),
.optional = false, },
[TCA_MPLS_PROTO] = { .type = NL_A_U16, .optional = true, },
[TCA_MPLS_LABEL] = { .type = NL_A_U32, .optional = true, },
[TCA_MPLS_TC] = { .type = NL_A_U8, .optional = true, },
[TCA_MPLS_TTL] = { .type = NL_A_U8, .optional = true, },
[TCA_MPLS_BOS] = { .type = NL_A_U8, .optional = true, },
};
static int
nl_parse_act_mpls(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *mpls_attrs[ARRAY_SIZE(mpls_policy)];
const struct nlattr *mpls_parms;
struct nlattr *mpls_proto;
struct nlattr *mpls_label;
struct tc_action *action;
const struct tc_mpls *m;
struct nlattr *mpls_ttl;
struct nlattr *mpls_bos;
struct nlattr *mpls_tc;
if (!nl_parse_nested(options, mpls_policy, mpls_attrs,
ARRAY_SIZE(mpls_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse mpls action options");
return EPROTO;
}
action = &flower->actions[flower->action_count++];
mpls_parms = mpls_attrs[TCA_MPLS_PARMS];
m = nl_attr_get_unspec(mpls_parms, sizeof *m);
switch (m->m_action) {
case TCA_MPLS_ACT_POP:
mpls_proto = mpls_attrs[TCA_MPLS_PROTO];
if (mpls_proto) {
action->mpls.proto = nl_attr_get_be16(mpls_proto);
}
action->type = TC_ACT_MPLS_POP;
break;
case TCA_MPLS_ACT_PUSH:
mpls_proto = mpls_attrs[TCA_MPLS_PROTO];
if (mpls_proto) {
action->mpls.proto = nl_attr_get_be16(mpls_proto);
}
mpls_label = mpls_attrs[TCA_MPLS_LABEL];
if (mpls_label) {
action->mpls.label = nl_attr_get_u32(mpls_label);
}
mpls_tc = mpls_attrs[TCA_MPLS_TC];
if (mpls_tc) {
action->mpls.tc = nl_attr_get_u8(mpls_tc);
}
mpls_ttl = mpls_attrs[TCA_MPLS_TTL];
if (mpls_ttl) {
action->mpls.ttl = nl_attr_get_u8(mpls_ttl);
}
mpls_bos = mpls_attrs[TCA_MPLS_BOS];
if (mpls_bos) {
action->mpls.bos = nl_attr_get_u8(mpls_bos);
}
action->type = TC_ACT_MPLS_PUSH;
break;
case TCA_MPLS_ACT_MODIFY:
mpls_label = mpls_attrs[TCA_MPLS_LABEL];
if (mpls_label) {
action->mpls.label = nl_attr_get_u32(mpls_label);
}
mpls_tc = mpls_attrs[TCA_MPLS_TC];
if (mpls_tc) {
action->mpls.tc = nl_attr_get_u8(mpls_tc);
}
mpls_ttl = mpls_attrs[TCA_MPLS_TTL];
if (mpls_ttl) {
action->mpls.ttl = nl_attr_get_u8(mpls_ttl);
}
mpls_bos = mpls_attrs[TCA_MPLS_BOS];
if (mpls_bos) {
action->mpls.bos = nl_attr_get_u8(mpls_bos);
}
action->type = TC_ACT_MPLS_SET;
break;
default:
VLOG_ERR_RL(&error_rl, "unknown mpls action: %d, %d",
m->action, m->m_action);
return EINVAL;
}
return 0;
}
static const struct nl_policy csum_policy[] = {
[TCA_CSUM_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_csum),
.optional = false, },
};
static int
nl_parse_act_csum(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *csum_attrs[ARRAY_SIZE(csum_policy)];
const struct tc_csum *c;
const struct nlattr *csum_parms;
if (!nl_parse_nested(options, csum_policy, csum_attrs,
ARRAY_SIZE(csum_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse csum action options");
return EPROTO;
}
csum_parms = csum_attrs[TCA_CSUM_PARMS];
c = nl_attr_get_unspec(csum_parms, sizeof *c);
/* sanity checks */
if (c->update_flags != flower->csum_update_flags) {
VLOG_WARN_RL(&error_rl,
"expected different act csum flags: 0x%x != 0x%x",
flower->csum_update_flags, c->update_flags);
return EINVAL;
}
flower->csum_update_flags = 0; /* so we know csum was handled */
if (flower->needs_full_ip_proto_mask
&& flower->mask.ip_proto != UINT8_MAX) {
VLOG_WARN_RL(&error_rl, "expected full matching on flower ip_proto");
return EINVAL;
}
return 0;
}
static const struct nl_policy act_policy[] = {
[TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, },
[TCA_ACT_COOKIE] = { .type = NL_A_UNSPEC, .optional = true, },
[TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = true, },
[TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, },
};
static const struct nl_policy stats_policy[] = {
[TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct gnet_stats_basic),
.optional = false, },
tc: Fix stats byte count on fragmented packets. Fragmented packets with offset=0 are defragmented by tc act_ct, and only when assembled pass to next action, in ovs offload case, a goto action. Since stats are overwritten on each action dump, only the stats for last action in the tc filter action priority list is taken, the stats on the goto action, which count only the assembled packets. See below for example. Hardware updates just part of the actions (gact, ct, mirred) - those that support stats_update() operation. Since datapath rules end with either an output (mirred) or recirc/drop (both gact), tc rule will at least have one action that supports it. For software packets, the first action will have the max software packets count. Tc dumps total packets (hw + sw) and hardware packets, then software packets needs to be calculated from this (total - hw). To fix the above, get hardware packets and calculate software packets for each action, take the max of each set, then combine back to get the total packets that went through software and hardware. Example by running ping above MTU (ping <IP> -s 2000): ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first), packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1) ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later), packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1) Second rule should have had bytes=14*<size of 'later' frag>, but instead it's bytes=14*<size of assembled packets - size of 'first' + 'later' frags>. Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support") Signed-off-by: Paul Blakey <paulb@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-11-07 14:12:35 +02:00
[TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct gnet_stats_basic),
.optional = true, },
};
static int
nl_parse_single_action(struct nlattr *action, struct tc_flower *flower,
bool terse)
{
struct nlattr *act_options;
struct nlattr *act_stats;
struct nlattr *act_cookie;
const char *act_kind;
struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
tc: Fix stats byte count on fragmented packets. Fragmented packets with offset=0 are defragmented by tc act_ct, and only when assembled pass to next action, in ovs offload case, a goto action. Since stats are overwritten on each action dump, only the stats for last action in the tc filter action priority list is taken, the stats on the goto action, which count only the assembled packets. See below for example. Hardware updates just part of the actions (gact, ct, mirred) - those that support stats_update() operation. Since datapath rules end with either an output (mirred) or recirc/drop (both gact), tc rule will at least have one action that supports it. For software packets, the first action will have the max software packets count. Tc dumps total packets (hw + sw) and hardware packets, then software packets needs to be calculated from this (total - hw). To fix the above, get hardware packets and calculate software packets for each action, take the max of each set, then combine back to get the total packets that went through software and hardware. Example by running ping above MTU (ping <IP> -s 2000): ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first), packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1) ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later), packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1) Second rule should have had bytes=14*<size of 'later' frag>, but instead it's bytes=14*<size of assembled packets - size of 'first' + 'later' frags>. Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support") Signed-off-by: Paul Blakey <paulb@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-11-07 14:12:35 +02:00
struct ovs_flow_stats *stats_sw = &flower->stats_sw;
struct ovs_flow_stats *stats_hw = &flower->stats_hw;
const struct gnet_stats_basic *bs_all = NULL;
const struct gnet_stats_basic *bs_hw = NULL;
struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, };
int err = 0;
if (!nl_parse_nested(action, act_policy, action_attrs,
ARRAY_SIZE(act_policy)) ||
(!terse && !action_attrs[TCA_ACT_OPTIONS])) {
VLOG_ERR_RL(&error_rl, "failed to parse single action options");
return EPROTO;
}
act_kind = nl_attr_get_string(action_attrs[TCA_ACT_KIND]);
act_options = action_attrs[TCA_ACT_OPTIONS];
act_cookie = action_attrs[TCA_ACT_COOKIE];
if (terse) {
/* Terse dump doesn't provide act options attribute. */
} else if (!strcmp(act_kind, "gact")) {
err = nl_parse_act_gact(act_options, flower);
} else if (!strcmp(act_kind, "mirred")) {
err = nl_parse_act_mirred(act_options, flower);
} else if (!strcmp(act_kind, "vlan")) {
err = nl_parse_act_vlan(act_options, flower);
} else if (!strcmp(act_kind, "mpls")) {
err = nl_parse_act_mpls(act_options, flower);
} else if (!strcmp(act_kind, "tunnel_key")) {
err = nl_parse_act_tunnel_key(act_options, flower);
} else if (!strcmp(act_kind, "pedit")) {
err = nl_parse_act_pedit(act_options, flower);
} else if (!strcmp(act_kind, "csum")) {
nl_parse_act_csum(act_options, flower);
} else if (!strcmp(act_kind, "skbedit")) {
/* Added for TC rule only (not in OvS rule) so ignore. */
} else if (!strcmp(act_kind, "ct")) {
nl_parse_act_ct(act_options, flower);
} else {
VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
err = EINVAL;
}
if (err) {
return err;
}
if (act_cookie) {
flower->act_cookie.data = nl_attr_get(act_cookie);
flower->act_cookie.len = nl_attr_get_size(act_cookie);
}
act_stats = action_attrs[TCA_ACT_STATS];
if (!nl_parse_nested(act_stats, stats_policy, stats_attrs,
ARRAY_SIZE(stats_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse action stats policy");
return EPROTO;
}
tc: Fix stats byte count on fragmented packets. Fragmented packets with offset=0 are defragmented by tc act_ct, and only when assembled pass to next action, in ovs offload case, a goto action. Since stats are overwritten on each action dump, only the stats for last action in the tc filter action priority list is taken, the stats on the goto action, which count only the assembled packets. See below for example. Hardware updates just part of the actions (gact, ct, mirred) - those that support stats_update() operation. Since datapath rules end with either an output (mirred) or recirc/drop (both gact), tc rule will at least have one action that supports it. For software packets, the first action will have the max software packets count. Tc dumps total packets (hw + sw) and hardware packets, then software packets needs to be calculated from this (total - hw). To fix the above, get hardware packets and calculate software packets for each action, take the max of each set, then combine back to get the total packets that went through software and hardware. Example by running ping above MTU (ping <IP> -s 2000): ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first), packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1) ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later), packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1) Second rule should have had bytes=14*<size of 'later' frag>, but instead it's bytes=14*<size of assembled packets - size of 'first' + 'later' frags>. Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support") Signed-off-by: Paul Blakey <paulb@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-11-07 14:12:35 +02:00
bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs_all);
if (stats_attrs[TCA_STATS_BASIC_HW]) {
bs_hw = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW],
sizeof *bs_hw);
bs_sw.packets = bs_all->packets - bs_hw->packets;
bs_sw.bytes = bs_all->bytes - bs_hw->bytes;
} else {
bs_sw.packets = bs_all->packets;
bs_sw.bytes = bs_all->bytes;
}
if (bs_sw.packets > get_32aligned_u64(&stats_sw->n_packets)) {
put_32aligned_u64(&stats_sw->n_packets, bs_sw.packets);
put_32aligned_u64(&stats_sw->n_bytes, bs_sw.bytes);
}
if (bs_hw && bs_hw->packets > get_32aligned_u64(&stats_hw->n_packets)) {
put_32aligned_u64(&stats_hw->n_packets, bs_hw->packets);
put_32aligned_u64(&stats_hw->n_bytes, bs_hw->bytes);
}
return 0;
}
#define TCA_ACT_MIN_PRIO 1
static int
nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower,
bool terse)
{
const struct nlattr *actions = attrs[TCA_FLOWER_ACT];
static struct nl_policy actions_orders_policy[TCA_ACT_MAX_NUM + 1] = {};
struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
const int max_size = ARRAY_SIZE(actions_orders_policy);
for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
actions_orders_policy[i].type = NL_A_NESTED;
actions_orders_policy[i].optional = true;
}
if (!nl_parse_nested(actions, actions_orders_policy, actions_orders,
ARRAY_SIZE(actions_orders_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse flower order of actions");
return EPROTO;
}
for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
if (actions_orders[i]) {
int err;
if (flower->action_count >= TCA_ACT_MAX_NUM) {
VLOG_DBG_RL(&error_rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
return EOPNOTSUPP;
}
err = nl_parse_single_action(actions_orders[i], flower, terse);
if (err) {
return err;
}
}
}
if (flower->csum_update_flags) {
VLOG_WARN_RL(&error_rl,
"expected act csum with flags: 0x%x",
flower->csum_update_flags);
return EINVAL;
}
return 0;
}
static int
nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower,
bool terse)
{
struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
int err;
if (terse) {
if (!nl_parse_nested(nl_options, tca_flower_terse_policy,
attrs, ARRAY_SIZE(tca_flower_terse_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse flower classifier terse options");
return EPROTO;
}
goto skip_flower_opts;
}
if (!nl_parse_nested(nl_options, tca_flower_policy,
attrs, ARRAY_SIZE(tca_flower_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse flower classifier options");
return EPROTO;
}
nl_parse_flower_eth(attrs, flower);
nl_parse_flower_arp(attrs, flower);
nl_parse_flower_mpls(attrs, flower);
nl_parse_flower_vlan(attrs, flower);
nl_parse_flower_ip(attrs, flower);
err = nl_parse_flower_tunnel(attrs, flower);
if (err) {
return err;
}
skip_flower_opts:
nl_parse_flower_flags(attrs, flower);
return nl_parse_flower_actions(attrs, flower, terse);
}
int
parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
struct tc_flower *flower, bool terse)
{
struct tcmsg *tc;
struct nlattr *ta[ARRAY_SIZE(tca_policy)];
const char *kind;
if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
return EPROTO;
}
memset(flower, 0, sizeof *flower);
tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
flower->mask.eth_type = OVS_BE16_MAX;
id->prio = tc_get_major(tc->tcm_info);
id->handle = tc->tcm_handle;
if (id->prio == TC_RESERVED_PRIORITY_POLICE) {
2019-02-01 10:19:32 +00:00
return 0;
}
if (!id->handle) {
return EAGAIN;
}
if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
tca_policy, ta, ARRAY_SIZE(ta))) {
VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
return EPROTO;
}
if (ta[TCA_CHAIN]) {
id->chain = nl_attr_get_u32(ta[TCA_CHAIN]);
}
kind = nl_attr_get_string(ta[TCA_KIND]);
if (strcmp(kind, "flower")) {
VLOG_DBG_ONCE("Unsupported filter: %s", kind);
return EPROTO;
}
return nl_parse_flower_options(ta[TCA_OPTIONS], flower, terse);
}
int
parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
{
struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
struct tcmsg *tc;
tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
tca_chain_policy, ta, ARRAY_SIZE(ta))) {
VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
return EINVAL;
}
*chain = nl_attr_get_u32(ta[TCA_CHAIN]);
return 0;
}
int
tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump, bool terse)
{
struct ofpbuf request;
request_from_tcf_id(id, 0, RTM_GETTFILTER, NLM_F_DUMP, &request);
if (terse) {
struct nla_bitfield32 dump_flags = { TCA_DUMP_FLAGS_TERSE,
TCA_DUMP_FLAGS_TERSE };
nl_msg_put_unspec(&request, TCA_DUMP_FLAGS, &dump_flags,
sizeof dump_flags);
}
nl_dump_start(dump, NETLINK_ROUTE, &request);
ofpbuf_uninit(&request);
return 0;
}
int
tc_dump_tc_chain_start(struct tcf_id *id, struct nl_dump *dump)
{
struct ofpbuf request;
request_from_tcf_id(id, 0, RTM_GETCHAIN, NLM_F_DUMP, &request);
nl_dump_start(dump, NETLINK_ROUTE, &request);
ofpbuf_uninit(&request);
return 0;
}
int
tc_del_filter(struct tcf_id *id)
{
struct ofpbuf request;
request_from_tcf_id(id, 0, RTM_DELTFILTER, NLM_F_ACK, &request);
return tc_transact(&request, NULL);
}
int
tc_get_flower(struct tcf_id *id, struct tc_flower *flower)
{
struct ofpbuf request;
struct ofpbuf *reply;
int error;
request_from_tcf_id(id, 0, RTM_GETTFILTER, NLM_F_ECHO, &request);
error = tc_transact(&request, &reply);
if (error) {
return error;
}
error = parse_netlink_to_tc_flower(reply, id, flower, false);
ofpbuf_delete(reply);
return error;
}
static int
tc_get_tc_cls_policy(enum tc_offload_policy policy)
{
if (policy == TC_POLICY_SKIP_HW) {
return TCA_CLS_FLAGS_SKIP_HW;
} else if (policy == TC_POLICY_SKIP_SW) {
return TCA_CLS_FLAGS_SKIP_SW;
}
return 0;
}
static void
nl_msg_put_act_csum(struct ofpbuf *request, uint32_t flags)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "csum");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_csum parm = { .action = TC_ACT_PIPE,
.update_flags = flags };
nl_msg_put_unspec(request, TCA_CSUM_PARMS, &parm, sizeof parm);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_pedit(struct ofpbuf *request, struct tc_pedit *parm,
struct tc_pedit_key_ex *ex)
{
size_t ksize = sizeof *parm + parm->nkeys * sizeof(struct tc_pedit_key);
size_t offset, offset_keys_ex, offset_key;
int i;
nl_msg_put_string(request, TCA_ACT_KIND, "pedit");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
parm->action = TC_ACT_PIPE;
nl_msg_put_unspec(request, TCA_PEDIT_PARMS_EX, parm, ksize);
offset_keys_ex = nl_msg_start_nested(request, TCA_PEDIT_KEYS_EX);
for (i = 0; i < parm->nkeys; i++, ex++) {
offset_key = nl_msg_start_nested(request, TCA_PEDIT_KEY_EX);
nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_HTYPE, ex->htype);
nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_CMD, ex->cmd);
nl_msg_end_nested(request, offset_key);
}
nl_msg_end_nested(request, offset_keys_ex);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_push_vlan(struct ofpbuf *request, ovs_be16 tpid,
uint16_t vid, uint8_t prio)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_vlan parm = { .action = TC_ACT_PIPE,
.v_action = TCA_VLAN_ACT_PUSH };
nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
nl_msg_put_be16(request, TCA_VLAN_PUSH_VLAN_PROTOCOL, tpid);
nl_msg_put_u16(request, TCA_VLAN_PUSH_VLAN_ID, vid);
nl_msg_put_u8(request, TCA_VLAN_PUSH_VLAN_PRIORITY, prio);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_pop_vlan(struct ofpbuf *request)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_vlan parm = { .action = TC_ACT_PIPE,
.v_action = TCA_VLAN_ACT_POP };
nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_pop_mpls(struct ofpbuf *request, ovs_be16 proto)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "mpls");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | NLA_F_NESTED);
{
struct tc_mpls parm = { .action = TC_ACT_PIPE,
.m_action = TCA_MPLS_ACT_POP };
nl_msg_put_unspec(request, TCA_MPLS_PARMS, &parm, sizeof parm);
nl_msg_put_be16(request, TCA_MPLS_PROTO, proto);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_push_mpls(struct ofpbuf *request, ovs_be16 proto,
uint32_t label, uint8_t tc, uint8_t ttl, uint8_t bos)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "mpls");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | NLA_F_NESTED);
{
struct tc_mpls parm = { .action = TC_ACT_PIPE,
.m_action = TCA_MPLS_ACT_PUSH };
nl_msg_put_unspec(request, TCA_MPLS_PARMS, &parm, sizeof parm);
nl_msg_put_be16(request, TCA_MPLS_PROTO, proto);
nl_msg_put_u32(request, TCA_MPLS_LABEL, label);
nl_msg_put_u8(request, TCA_MPLS_TC, tc);
nl_msg_put_u8(request, TCA_MPLS_TTL, ttl);
nl_msg_put_u8(request, TCA_MPLS_BOS, bos);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_set_mpls(struct ofpbuf *request, uint32_t label, uint8_t tc,
uint8_t ttl, uint8_t bos)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "mpls");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | NLA_F_NESTED);
{
struct tc_mpls parm = { .action = TC_ACT_PIPE,
.m_action = TCA_MPLS_ACT_MODIFY };
nl_msg_put_unspec(request, TCA_MPLS_PARMS, &parm, sizeof parm);
nl_msg_put_u32(request, TCA_MPLS_LABEL, label);
nl_msg_put_u8(request, TCA_MPLS_TC, tc);
nl_msg_put_u8(request, TCA_MPLS_TTL, ttl);
nl_msg_put_u8(request, TCA_MPLS_BOS, bos);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_tunnel_key_release(struct ofpbuf *request)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
.t_action = TCA_TUNNEL_KEY_ACT_RELEASE };
nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_tunnel_geneve_option(struct ofpbuf *request,
struct tun_metadata tun_metadata)
{
const struct geneve_opt *opt;
size_t outer, inner;
int len, cnt = 0;
len = tun_metadata.present.len;
if (!len) {
return;
}
outer = nl_msg_start_nested(request, TCA_TUNNEL_KEY_ENC_OPTS);
while (len) {
opt = &tun_metadata.opts.gnv[cnt];
inner = nl_msg_start_nested(request, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
opt->opt_class);
nl_msg_put_u8(request, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE, opt->type);
nl_msg_put_unspec(request, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA, opt + 1,
opt->length * 4);
cnt += sizeof(struct geneve_opt) / 4 + opt->length;
len -= sizeof(struct geneve_opt) + opt->length * 4;
nl_msg_end_nested(request, inner);
}
nl_msg_end_nested(request, outer);
}
static void
nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, bool id_present,
ovs_be64 id, ovs_be32 ipv4_src,
ovs_be32 ipv4_dst, struct in6_addr *ipv6_src,
struct in6_addr *ipv6_dst,
ovs_be16 tp_dst, uint8_t tos, uint8_t ttl,
struct tun_metadata tun_metadata,
uint8_t no_csum)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
.t_action = TCA_TUNNEL_KEY_ACT_SET };
nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
ovs_be32 id32 = be64_to_be32(id);
if (id_present) {
nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32);
}
if (ipv4_dst) {
nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src);
nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst);
} else if (ipv6_addr_is_set(ipv6_dst)) {
nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_DST,
ipv6_dst);
nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_SRC,
ipv6_src);
}
if (tos) {
nl_msg_put_u8(request, TCA_TUNNEL_KEY_ENC_TOS, tos);
}
if (ttl) {
nl_msg_put_u8(request, TCA_TUNNEL_KEY_ENC_TTL, ttl);
}
if (tp_dst) {
nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_DST_PORT, tp_dst);
}
nl_msg_put_act_tunnel_geneve_option(request, tun_metadata);
nl_msg_put_u8(request, TCA_TUNNEL_KEY_NO_CSUM, no_csum);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_gact(struct ofpbuf *request, uint32_t chain)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "gact");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_gact p = { .action = TC_ACT_SHOT };
if (chain) {
p.action = TC_ACT_GOTO_CHAIN | chain;
}
nl_msg_put_unspec(request, TCA_GACT_PARMS, &p, sizeof p);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_ct(struct ofpbuf *request, struct tc_action *action)
{
uint16_t ct_action = 0;
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "ct");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | NLA_F_NESTED);
{
struct tc_ct ct = {
.action = TC_ACT_PIPE,
};
if (!action->ct.clear) {
if (action->ct.zone) {
nl_msg_put_u16(request, TCA_CT_ZONE, action->ct.zone);
}
if (!is_all_zeros(&action->ct.label_mask,
sizeof action->ct.label_mask)) {
nl_msg_put_u128(request, TCA_CT_LABELS,
action->ct.label);
nl_msg_put_u128(request, TCA_CT_LABELS_MASK,
action->ct.label_mask);
}
if (action->ct.mark_mask) {
nl_msg_put_u32(request, TCA_CT_MARK,
action->ct.mark);
nl_msg_put_u32(request, TCA_CT_MARK_MASK,
action->ct.mark_mask);
}
if (action->ct.commit) {
ct_action = TCA_CT_ACT_COMMIT;
if (action->ct.force) {
ct_action |= TCA_CT_ACT_FORCE;
}
}
if (action->ct.nat_type) {
ct_action |= TCA_CT_ACT_NAT;
if (action->ct.nat_type == TC_NAT_SRC) {
ct_action |= TCA_CT_ACT_NAT_SRC;
} else if (action->ct.nat_type == TC_NAT_DST) {
ct_action |= TCA_CT_ACT_NAT_DST;
}
if (action->ct.range.ip_family == AF_INET) {
nl_msg_put_be32(request, TCA_CT_NAT_IPV4_MIN,
action->ct.range.ipv4.min);
if (action->ct.range.ipv4.max) {
nl_msg_put_be32(request, TCA_CT_NAT_IPV4_MAX,
action->ct.range.ipv4.max);
}
} else if (action->ct.range.ip_family == AF_INET6) {
nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MIN,
&action->ct.range.ipv6.min);
if (ipv6_addr_is_set(&action->ct.range.ipv6.max)) {
nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MAX,
&action->ct.range.ipv6.max);
}
}
if (action->ct.range.port.min) {
nl_msg_put_be16(request, TCA_CT_NAT_PORT_MIN,
action->ct.range.port.min);
if (action->ct.range.port.max) {
nl_msg_put_be16(request, TCA_CT_NAT_PORT_MAX,
action->ct.range.port.max);
}
}
}
} else {
ct_action = TCA_CT_ACT_CLEAR;
}
nl_msg_put_u16(request, TCA_CT_ACTION, ct_action);
nl_msg_put_unspec(request, TCA_CT_PARMS, &ct, sizeof ct);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_skbedit_to_host(struct ofpbuf *request)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "skbedit");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_skbedit s = { .action = TC_ACT_PIPE };
nl_msg_put_unspec(request, TCA_SKBEDIT_PARMS, &s, sizeof s);
nl_msg_put_be16(request, TCA_SKBEDIT_PTYPE, PACKET_HOST);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_mirred(struct ofpbuf *request, int ifindex, int action,
int eaction)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "mirred");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_mirred m = { .action = action,
.eaction = eaction,
.ifindex = ifindex };
nl_msg_put_unspec(request, TCA_MIRRED_PARMS, &m, sizeof m);
}
nl_msg_end_nested(request, offset);
}
static inline void
nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
if (ck->len) {
nl_msg_put_unspec(request, TCA_ACT_COOKIE, ck->data, ck->len);
}
}
static inline void
nl_msg_put_act_flags(struct ofpbuf *request) {
struct nla_bitfield32 act_flags = { TCA_ACT_FLAGS_NO_PERCPU_STATS,
TCA_ACT_FLAGS_NO_PERCPU_STATS };
nl_msg_put_unspec(request, TCA_ACT_FLAGS, &act_flags, sizeof act_flags);
}
/* Given flower, a key_to_pedit map entry, calculates the rest,
* where:
*
* mask, data - pointers of where read the first word of flower->key/mask.
* current_offset - which offset to use for the first pedit action.
* cnt - max pedits actions to use.
* first_word_mask/last_word_mask - the mask to use for the first/last read
* (as we read entire words). */
static void
calc_offsets(struct tc_action *action, struct flower_key_to_pedit *m,
int *cur_offset, int *cnt, ovs_be32 *last_word_mask,
ovs_be32 *first_word_mask, ovs_be32 **mask, ovs_be32 **data)
{
int start_offset, max_offset, total_size;
int diff, right_zero_bits, left_zero_bits;
char *rewrite_key = (void *) &action->rewrite.key;
char *rewrite_mask = (void *) &action->rewrite.mask;
max_offset = m->offset + m->size;
start_offset = ROUND_DOWN(m->offset, 4);
diff = m->offset - start_offset;
total_size = max_offset - start_offset;
right_zero_bits = 8 * (4 - ((max_offset % 4) ? : 4));
left_zero_bits = 8 * (m->offset - start_offset);
*cur_offset = start_offset;
*cnt = (total_size / 4) + (total_size % 4 ? 1 : 0);
*last_word_mask = htonl(UINT32_MAX << right_zero_bits);
*first_word_mask = htonl(UINT32_MAX >> left_zero_bits);
*data = (void *) (rewrite_key + m->flower_offset - diff);
*mask = (void *) (rewrite_mask + m->flower_offset - diff);
}
static inline int
csum_update_flag(struct tc_flower *flower,
enum pedit_header_type htype) {
/* Explictily specifiy the csum flags so HW can return EOPNOTSUPP
* if it doesn't support a checksum recalculation of some headers.
* And since OVS allows a flow such as
* eth(dst=<mac>),eth_type(0x0800) actions=set(ipv4(src=<new_ip>))
* we need to force a more specific flow as this can, for example,
* need a recalculation of icmp checksum if the packet that passes
* is ICMPv6 and tcp checksum if its tcp. */
switch (htype) {
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
/* Fall through. */
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
if (flower->key.ip_proto == IPPROTO_TCP) {
flower->needs_full_ip_proto_mask = true;
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
} else if (flower->key.ip_proto == IPPROTO_UDP) {
flower->needs_full_ip_proto_mask = true;
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
} else if (flower->key.ip_proto == IPPROTO_ICMP) {
flower->needs_full_ip_proto_mask = true;
} else if (flower->key.ip_proto == IPPROTO_ICMPV6) {
flower->needs_full_ip_proto_mask = true;
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_ICMP;
} else {
VLOG_WARN_RL(&error_rl,
"can't offload rewrite of IP/IPV6 with ip_proto: %d",
flower->key.ip_proto);
break;
}
/* Fall through. */
case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
return 0; /* success */
case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK:
case __PEDIT_HDR_TYPE_MAX:
default:
break;
}
return EOPNOTSUPP;
}
static int
nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
struct tc_flower *flower,
struct tc_action *action)
{
struct {
struct tc_pedit sel;
struct tc_pedit_key keys[MAX_PEDIT_OFFSETS];
struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS];
} sel = {
.sel = {
.nkeys = 0
}
};
int i, j, err;
for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) {
struct flower_key_to_pedit *m = &flower_pedit_map[i];
struct tc_pedit_key *pedit_key = NULL;
struct tc_pedit_key_ex *pedit_key_ex = NULL;
ovs_be32 *mask, *data, first_word_mask, last_word_mask;
int cnt = 0, cur_offset = 0;
if (!m->size) {
continue;
}
calc_offsets(action, m, &cur_offset, &cnt, &last_word_mask,
&first_word_mask, &mask, &data);
for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) {
ovs_be32 mask_word = *mask;
ovs_be32 data_word = *data;
if (j == 0) {
mask_word &= first_word_mask;
}
if (j == cnt - 1) {
mask_word &= last_word_mask;
}
if (!mask_word) {
continue;
}
if (sel.sel.nkeys == MAX_PEDIT_OFFSETS) {
VLOG_WARN_RL(&error_rl, "reached too many pedit offsets: %d",
MAX_PEDIT_OFFSETS);
return EOPNOTSUPP;
}
pedit_key = &sel.keys[sel.sel.nkeys];
pedit_key_ex = &sel.keys_ex[sel.sel.nkeys];
pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_SET;
pedit_key_ex->htype = m->htype;
pedit_key->off = cur_offset;
mask_word = htonl(ntohl(mask_word) >> m->boundary_shift);
data_word = htonl(ntohl(data_word) >> m->boundary_shift);
pedit_key->mask = ~mask_word;
pedit_key->val = data_word & mask_word;
sel.sel.nkeys++;
err = csum_update_flag(flower, m->htype);
if (err) {
return err;
}
if (flower->needs_full_ip_proto_mask) {
flower->mask.ip_proto = UINT8_MAX;
}
}
}
nl_msg_put_act_pedit(request, &sel.sel, sel.keys_ex);
return 0;
}
tc: Fix incorrect TC rule for decap+encap datapath flow. A datapath flow generated for traffic from vxlan port to another vxlan port looks like this: tunnel(tun_id=0x65,src=10.10.11.3,dst=10.10.11.2,ttl=0/0,tp_dst=4789,flags(+key)), ...,in_port(vxlan_sys_4789),..., actions:set(tunnel(tun_id=0x66,src=10.10.12.2,dst=10.10.12.3,tp_dst=4789,flags(key))), vxlan_sys_4789 The generated TC rule with explicit tunnel key unset action added after tunnel key set action, which is wrong. filter protocol ip pref 7 flower chain 0 handle 0x1 dst_mac fa:16:3e:2a:4e:23 eth_type ipv4 ip_tos 0x0/3 enc_dst_ip 10.10.11.2 enc_src_ip 10.10.11.3 enc_key_id 101 enc_dst_port 4789 ip_flags nofrag not_in_hw action order 1: tunnel_key set src_ip 10.10.12.2 dst_ip 10.10.12.3 key_id 102 dst_port 4789 nocsum pipe index 1 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 2: tunnel_key unset pipe index 2 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 3: mirred (Egress Redirect to device vxlan_sys_4789) stolen index 1 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie e0c82bfd504b701428b00db6b08db3b2 Fix it by also adding the the tunnel key unset action before the tunnel key set action and not only before output port. Fixes: 7c53bd7839d8 ("tc: Move tunnel_key unset action before output ports") Signed-off-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Paul Blakey <paulb@nvidia.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-12-02 14:38:32 +02:00
static void
nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index)
{
size_t act_offset;
act_offset = nl_msg_start_nested(request, act_index);
nl_msg_put_act_tunnel_key_release(request);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
}
/* Aggregates all previous successive pedit actions csum_update_flags
* to flower->csum_update_flags. Only append one csum action to the
* last pedit action. */
static void
nl_msg_put_csum_act(struct ofpbuf *request, struct tc_flower *flower,
uint16_t *act_index)
{
size_t act_offset;
/* No pedit actions or processed already. */
if (!flower->csum_update_flags) {
return;
}
act_offset = nl_msg_start_nested(request, (*act_index)++);
nl_msg_put_act_csum(request, flower->csum_update_flags);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
/* Clear it. So we can have another series of pedit actions. */
flower->csum_update_flags = 0;
}
static int
nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
{
bool ingress, released = false;
size_t offset;
size_t act_offset;
uint16_t act_index = 1;
struct tc_action *action;
int i, ifindex = 0;
offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
{
int error;
action = flower->actions;
for (i = 0; i < flower->action_count; i++, action++) {
if (action->type != TC_ACT_PEDIT) {
nl_msg_put_csum_act(request, flower, &act_index);
}
switch (action->type) {
case TC_ACT_PEDIT: {
act_offset = nl_msg_start_nested(request, act_index++);
error = nl_msg_put_flower_rewrite_pedits(request, flower,
action);
if (error) {
return error;
}
nl_msg_end_nested(request, act_offset);
if (i == flower->action_count - 1) {
/* If this is the last action check csum calc again. */
nl_msg_put_csum_act(request, flower, &act_index);
}
}
break;
case TC_ACT_ENCAP: {
tc: Fix incorrect TC rule for decap+encap datapath flow. A datapath flow generated for traffic from vxlan port to another vxlan port looks like this: tunnel(tun_id=0x65,src=10.10.11.3,dst=10.10.11.2,ttl=0/0,tp_dst=4789,flags(+key)), ...,in_port(vxlan_sys_4789),..., actions:set(tunnel(tun_id=0x66,src=10.10.12.2,dst=10.10.12.3,tp_dst=4789,flags(key))), vxlan_sys_4789 The generated TC rule with explicit tunnel key unset action added after tunnel key set action, which is wrong. filter protocol ip pref 7 flower chain 0 handle 0x1 dst_mac fa:16:3e:2a:4e:23 eth_type ipv4 ip_tos 0x0/3 enc_dst_ip 10.10.11.2 enc_src_ip 10.10.11.3 enc_key_id 101 enc_dst_port 4789 ip_flags nofrag not_in_hw action order 1: tunnel_key set src_ip 10.10.12.2 dst_ip 10.10.12.3 key_id 102 dst_port 4789 nocsum pipe index 1 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 2: tunnel_key unset pipe index 2 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 3: mirred (Egress Redirect to device vxlan_sys_4789) stolen index 1 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie e0c82bfd504b701428b00db6b08db3b2 Fix it by also adding the the tunnel key unset action before the tunnel key set action and not only before output port. Fixes: 7c53bd7839d8 ("tc: Move tunnel_key unset action before output ports") Signed-off-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Paul Blakey <paulb@nvidia.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-12-02 14:38:32 +02:00
if (!released && flower->tunnel) {
nl_msg_put_flower_acts_release(request, act_index++);
released = true;
}
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_tunnel_key_set(request, action->encap.id_present,
action->encap.id,
action->encap.ipv4.ipv4_src,
action->encap.ipv4.ipv4_dst,
&action->encap.ipv6.ipv6_src,
&action->encap.ipv6.ipv6_dst,
action->encap.tp_dst,
action->encap.tos,
action->encap.ttl,
action->encap.data,
action->encap.no_csum);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_VLAN_POP: {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_pop_vlan(request);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_VLAN_PUSH: {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_push_vlan(request,
action->vlan.vlan_push_tpid,
action->vlan.vlan_push_id,
action->vlan.vlan_push_prio);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_MPLS_POP: {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_pop_mpls(request, action->mpls.proto);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_MPLS_PUSH: {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_push_mpls(request, action->mpls.proto,
action->mpls.label, action->mpls.tc,
action->mpls.ttl, action->mpls.bos);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_MPLS_SET: {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_set_mpls(request, action->mpls.label,
action->mpls.tc, action->mpls.ttl,
action->mpls.bos);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_OUTPUT: {
if (!released && flower->tunnel) {
tc: Fix incorrect TC rule for decap+encap datapath flow. A datapath flow generated for traffic from vxlan port to another vxlan port looks like this: tunnel(tun_id=0x65,src=10.10.11.3,dst=10.10.11.2,ttl=0/0,tp_dst=4789,flags(+key)), ...,in_port(vxlan_sys_4789),..., actions:set(tunnel(tun_id=0x66,src=10.10.12.2,dst=10.10.12.3,tp_dst=4789,flags(key))), vxlan_sys_4789 The generated TC rule with explicit tunnel key unset action added after tunnel key set action, which is wrong. filter protocol ip pref 7 flower chain 0 handle 0x1 dst_mac fa:16:3e:2a:4e:23 eth_type ipv4 ip_tos 0x0/3 enc_dst_ip 10.10.11.2 enc_src_ip 10.10.11.3 enc_key_id 101 enc_dst_port 4789 ip_flags nofrag not_in_hw action order 1: tunnel_key set src_ip 10.10.12.2 dst_ip 10.10.12.3 key_id 102 dst_port 4789 nocsum pipe index 1 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 2: tunnel_key unset pipe index 2 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 3: mirred (Egress Redirect to device vxlan_sys_4789) stolen index 1 ref 1 bind 1 installed 568 sec used 0 sec Action statistics: Sent 46620 bytes 555 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie e0c82bfd504b701428b00db6b08db3b2 Fix it by also adding the the tunnel key unset action before the tunnel key set action and not only before output port. Fixes: 7c53bd7839d8 ("tc: Move tunnel_key unset action before output ports") Signed-off-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Paul Blakey <paulb@nvidia.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-12-02 14:38:32 +02:00
nl_msg_put_flower_acts_release(request, act_index++);
released = true;
}
ingress = action->out.ingress;
ifindex = action->out.ifindex_out;
if (ifindex < 1) {
VLOG_ERR_RL(&error_rl, "%s: invalid ifindex: %d, type: %d",
__func__, ifindex, action->type);
return EINVAL;
}
if (ingress) {
/* If redirecting to ingress (internal port) ensure
* pkt_type on skb is set to PACKET_HOST. */
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_skbedit_to_host(request);
nl_msg_end_nested(request, act_offset);
}
act_offset = nl_msg_start_nested(request, act_index++);
if (i == flower->action_count - 1) {
if (ingress) {
nl_msg_put_act_mirred(request, ifindex, TC_ACT_STOLEN,
TCA_INGRESS_REDIR);
} else {
nl_msg_put_act_mirred(request, ifindex, TC_ACT_STOLEN,
TCA_EGRESS_REDIR);
}
} else {
if (ingress) {
nl_msg_put_act_mirred(request, ifindex, TC_ACT_PIPE,
TCA_INGRESS_MIRROR);
} else {
nl_msg_put_act_mirred(request, ifindex, TC_ACT_PIPE,
TCA_EGRESS_MIRROR);
}
}
nl_msg_put_act_cookie(request, &flower->act_cookie);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_GOTO: {
if (released) {
/* We don't support tunnel release + output + goto
* for now, as next chain by default will try and match
* the tunnel metadata that was released/unset.
*
* This will happen with tunnel + mirror ports.
*/
return -EOPNOTSUPP;
}
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_gact(request, action->chain);
nl_msg_put_act_cookie(request, &flower->act_cookie);
nl_msg_end_nested(request, act_offset);
}
break;
case TC_ACT_CT: {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_ct(request, action);
nl_msg_put_act_cookie(request, &flower->act_cookie);
nl_msg_end_nested(request, act_offset);
}
break;
}
}
}
if (!flower->action_count) {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_gact(request, 0);
nl_msg_put_act_cookie(request, &flower->act_cookie);
nl_msg_put_act_flags(request);
nl_msg_end_nested(request, act_offset);
}
nl_msg_end_nested(request, offset);
return 0;
}
static void
nl_msg_put_masked_value(struct ofpbuf *request, uint16_t type,
uint16_t mask_type, const void *data,
const void *mask_data, size_t len)
{
if (mask_type != TCA_FLOWER_UNSPEC) {
if (is_all_zeros(mask_data, len)) {
return;
}
nl_msg_put_unspec(request, mask_type, mask_data, len);
}
nl_msg_put_unspec(request, type, data, len);
}
static void
nl_msg_put_flower_tunnel_opts(struct ofpbuf *request, uint16_t type,
struct tun_metadata metadata)
{
struct geneve_opt *opt;
size_t outer, inner;
int len, cnt = 0;
len = metadata.present.len;
if (!len) {
return;
}
outer = nl_msg_start_nested(request, type);
while (len) {
opt = &metadata.opts.gnv[cnt];
inner = nl_msg_start_nested(request, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
opt->opt_class);
nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, opt->type);
nl_msg_put_unspec(request, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, opt + 1,
opt->length * 4);
cnt += sizeof(struct geneve_opt) / 4 + opt->length;
len -= sizeof(struct geneve_opt) + opt->length * 4;
nl_msg_end_nested(request, inner);
}
nl_msg_end_nested(request, outer);
}
static void
nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
{
ovs_be32 ipv4_src_mask = flower->mask.tunnel.ipv4.ipv4_src;
ovs_be32 ipv4_dst_mask = flower->mask.tunnel.ipv4.ipv4_dst;
ovs_be32 ipv4_src = flower->key.tunnel.ipv4.ipv4_src;
ovs_be32 ipv4_dst = flower->key.tunnel.ipv4.ipv4_dst;
struct in6_addr *ipv6_src_mask = &flower->mask.tunnel.ipv6.ipv6_src;
struct in6_addr *ipv6_dst_mask = &flower->mask.tunnel.ipv6.ipv6_dst;
struct in6_addr *ipv6_src = &flower->key.tunnel.ipv6.ipv6_src;
struct in6_addr *ipv6_dst = &flower->key.tunnel.ipv6.ipv6_dst;
ovs_be16 tp_dst = flower->key.tunnel.tp_dst;
ovs_be32 id = be64_to_be32(flower->key.tunnel.id);
uint8_t tos = flower->key.tunnel.tos;
uint8_t ttl = flower->key.tunnel.ttl;
uint8_t tos_mask = flower->mask.tunnel.tos;
uint8_t ttl_mask = flower->mask.tunnel.ttl;
ovs_be64 id_mask = flower->mask.tunnel.id;
if (ipv4_dst_mask || ipv4_src_mask) {
nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
ipv4_dst_mask);
nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
ipv4_src_mask);
nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst);
nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src);
} else if (ipv6_addr_is_set(ipv6_dst_mask) ||
ipv6_addr_is_set(ipv6_src_mask)) {
nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
ipv6_dst_mask);
nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
ipv6_src_mask);
nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST, ipv6_dst);
nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src);
}
if (tos_mask) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TOS, tos);
nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TOS_MASK, tos_mask);
}
if (ttl_mask) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TTL, ttl);
nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TTL_MASK, ttl_mask);
}
if (tp_dst) {
nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst);
}
if (id_mask) {
nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id);
}
nl_msg_put_flower_tunnel_opts(request, TCA_FLOWER_KEY_ENC_OPTS,
flower->key.tunnel.metadata);
nl_msg_put_flower_tunnel_opts(request, TCA_FLOWER_KEY_ENC_OPTS_MASK,
flower->mask.tunnel.metadata);
}
#define FLOWER_PUT_MASKED_VALUE(member, type) \
nl_msg_put_masked_value(request, type, type##_MASK, &flower->key.member, \
&flower->mask.member, sizeof flower->key.member)
static int
nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
{
uint16_t host_eth_type = ntohs(flower->key.eth_type);
bool is_vlan = eth_type_vlan(flower->key.eth_type);
bool is_qinq = is_vlan && eth_type_vlan(flower->key.encap_eth_type[0]);
bool is_mpls = eth_type_mpls(flower->key.eth_type);
enum tc_offload_policy policy = flower->tc_policy;
int err;
/* need to parse acts first as some acts require changing the matching
* see csum_update_flag() */
err = nl_msg_put_flower_acts(request, flower);
if (err) {
return err;
}
if (is_vlan) {
if (is_qinq) {
host_eth_type = ntohs(flower->key.encap_eth_type[1]);
} else {
host_eth_type = ntohs(flower->key.encap_eth_type[0]);
}
}
if (is_mpls) {
host_eth_type = ntohs(flower->key.encap_eth_type[0]);
}
FLOWER_PUT_MASKED_VALUE(dst_mac, TCA_FLOWER_KEY_ETH_DST);
FLOWER_PUT_MASKED_VALUE(src_mac, TCA_FLOWER_KEY_ETH_SRC);
if (host_eth_type == ETH_P_ARP) {
FLOWER_PUT_MASKED_VALUE(arp.spa, TCA_FLOWER_KEY_ARP_SIP);
FLOWER_PUT_MASKED_VALUE(arp.tpa, TCA_FLOWER_KEY_ARP_TIP);
FLOWER_PUT_MASKED_VALUE(arp.sha, TCA_FLOWER_KEY_ARP_SHA);
FLOWER_PUT_MASKED_VALUE(arp.tha, TCA_FLOWER_KEY_ARP_THA);
FLOWER_PUT_MASKED_VALUE(arp.opcode, TCA_FLOWER_KEY_ARP_OP);
}
if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) {
FLOWER_PUT_MASKED_VALUE(ip_ttl, TCA_FLOWER_KEY_IP_TTL);
FLOWER_PUT_MASKED_VALUE(ip_tos, TCA_FLOWER_KEY_IP_TOS);
if (flower->mask.ip_proto && flower->key.ip_proto) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_IP_PROTO,
flower->key.ip_proto);
}
if (flower->mask.flags) {
nl_msg_put_be32(request, TCA_FLOWER_KEY_FLAGS,
htonl(flower->key.flags));
nl_msg_put_be32(request, TCA_FLOWER_KEY_FLAGS_MASK,
htonl(flower->mask.flags));
}
if (flower->key.ip_proto == IPPROTO_UDP) {
FLOWER_PUT_MASKED_VALUE(udp_src, TCA_FLOWER_KEY_UDP_SRC);
FLOWER_PUT_MASKED_VALUE(udp_dst, TCA_FLOWER_KEY_UDP_DST);
} else if (flower->key.ip_proto == IPPROTO_TCP) {
FLOWER_PUT_MASKED_VALUE(tcp_src, TCA_FLOWER_KEY_TCP_SRC);
FLOWER_PUT_MASKED_VALUE(tcp_dst, TCA_FLOWER_KEY_TCP_DST);
FLOWER_PUT_MASKED_VALUE(tcp_flags, TCA_FLOWER_KEY_TCP_FLAGS);
} else if (flower->key.ip_proto == IPPROTO_SCTP) {
FLOWER_PUT_MASKED_VALUE(sctp_src, TCA_FLOWER_KEY_SCTP_SRC);
FLOWER_PUT_MASKED_VALUE(sctp_dst, TCA_FLOWER_KEY_SCTP_DST);
} else if (flower->key.ip_proto == IPPROTO_ICMP) {
FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV4_CODE);
FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV4_TYPE);
} else if (flower->key.ip_proto == IPPROTO_ICMPV6) {
FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV6_CODE);
FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV6_TYPE);
}
FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE);
FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE);
FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK);
FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS);
}
if (host_eth_type == ETH_P_IP) {
FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_src, TCA_FLOWER_KEY_IPV4_SRC);
FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_dst, TCA_FLOWER_KEY_IPV4_DST);
} else if (host_eth_type == ETH_P_IPV6) {
FLOWER_PUT_MASKED_VALUE(ipv6.ipv6_src, TCA_FLOWER_KEY_IPV6_SRC);
FLOWER_PUT_MASKED_VALUE(ipv6.ipv6_dst, TCA_FLOWER_KEY_IPV6_DST);
}
nl_msg_put_be16(request, TCA_FLOWER_KEY_ETH_TYPE, flower->key.eth_type);
if (is_mpls) {
if (mpls_lse_to_ttl(flower->mask.mpls_lse)) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_MPLS_TTL,
mpls_lse_to_ttl(flower->key.mpls_lse));
}
if (mpls_lse_to_tc(flower->mask.mpls_lse)) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_MPLS_TC,
mpls_lse_to_tc(flower->key.mpls_lse));
}
if (mpls_lse_to_bos(flower->mask.mpls_lse)) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_MPLS_BOS,
mpls_lse_to_bos(flower->key.mpls_lse));
}
if (mpls_lse_to_label(flower->mask.mpls_lse)) {
nl_msg_put_u32(request, TCA_FLOWER_KEY_MPLS_LABEL,
mpls_lse_to_label(flower->key.mpls_lse));
}
}
if (is_vlan) {
if (flower->mask.vlan_id[0]) {
nl_msg_put_u16(request, TCA_FLOWER_KEY_VLAN_ID,
flower->key.vlan_id[0]);
}
if (flower->mask.vlan_prio[0]) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_VLAN_PRIO,
flower->key.vlan_prio[0]);
}
if (flower->key.encap_eth_type[0]) {
nl_msg_put_be16(request, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
flower->key.encap_eth_type[0]);
}
if (is_qinq) {
if (flower->mask.vlan_id[1]) {
nl_msg_put_u16(request, TCA_FLOWER_KEY_CVLAN_ID,
flower->key.vlan_id[1]);
}
if (flower->mask.vlan_prio[1]) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_CVLAN_PRIO,
flower->key.vlan_prio[1]);
}
if (flower->key.encap_eth_type[1]) {
nl_msg_put_be16(request, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
flower->key.encap_eth_type[1]);
}
}
}
if (policy == TC_POLICY_NONE) {
policy = tc_policy;
}
nl_msg_put_u32(request, TCA_FLOWER_FLAGS, tc_get_tc_cls_policy(policy));
if (flower->tunnel) {
nl_msg_put_flower_tunnel(request, flower);
}
return 0;
}
static void
log_tc_flower_match(const char *msg,
const struct tc_flower *a,
const struct tc_flower *b)
{
uint8_t key_a[sizeof(struct tc_flower_key)];
uint8_t key_b[sizeof(struct tc_flower_key)];
struct ds s = DS_EMPTY_INITIALIZER;
for (int i = 0; i < sizeof a->key; i++) {
uint8_t mask_a = ((uint8_t *) &a->mask)[i];
uint8_t mask_b = ((uint8_t *) &b->mask)[i];
key_a[i] = ((uint8_t *) &a->key)[i] & mask_a;
key_b[i] = ((uint8_t *) &b->key)[i] & mask_b;
}
ds_put_cstr(&s, "\nExpected Mask:\n");
ds_put_hex(&s, &a->mask, sizeof a->mask);
ds_put_cstr(&s, "\nReceived Mask:\n");
ds_put_hex(&s, &b->mask, sizeof b->mask);
ds_put_cstr(&s, "\nExpected Key:\n");
ds_put_hex(&s, &a->key, sizeof a->key);
ds_put_cstr(&s, "\nReceived Key:\n");
ds_put_hex(&s, &b->key, sizeof b->key);
ds_put_cstr(&s, "\nExpected Masked Key:\n");
ds_put_hex(&s, key_a, sizeof key_a);
ds_put_cstr(&s, "\nReceived Masked Key:\n");
ds_put_hex(&s, key_b, sizeof key_b);
if (a->action_count != b->action_count) {
/* If action count is not equal, we print all actions to see which
* ones are missing. */
const struct tc_action *action;
int i;
ds_put_cstr(&s, "\nExpected Actions:\n");
for (i = 0, action = a->actions; i < a->action_count; i++, action++) {
ds_put_cstr(&s, " - ");
ds_put_hex(&s, action, sizeof *action);
ds_put_cstr(&s, "\n");
}
ds_put_cstr(&s, "Received Actions:\n");
for (i = 0, action = b->actions; i < b->action_count; i++, action++) {
ds_put_cstr(&s, " - ");
ds_put_hex(&s, action, sizeof *action);
ds_put_cstr(&s, "\n");
}
} else {
/* Only dump the delta in actions. */
const struct tc_action *action_a = a->actions;
const struct tc_action *action_b = b->actions;
for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
if (memcmp(action_a, action_b, sizeof *action_a)) {
ds_put_format(&s,
"\nAction %d mismatch:\n - Expected Action: ",
i);
ds_put_hex(&s, action_a, sizeof *action_a);
ds_put_cstr(&s, "\n - Received Action: ");
ds_put_hex(&s, action_b, sizeof *action_b);
}
}
}
VLOG_DBG_RL(&error_rl, "%s%s", msg, ds_cstr(&s));
ds_destroy(&s);
}
static bool
cmp_tc_flower_match_action(const struct tc_flower *a,
const struct tc_flower *b)
{
if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
log_tc_flower_match("tc flower compare failed mask compare:", a, b);
return false;
}
/* We can not memcmp() the key as some keys might be set while the mask
* is not.*/
for (int i = 0; i < sizeof a->key; i++) {
uint8_t mask = ((uint8_t *)&a->mask)[i];
uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
if (key_a != key_b) {
log_tc_flower_match("tc flower compare failed masked key compare:",
a, b);
return false;
}
}
/* Compare the actions. */
const struct tc_action *action_a = a->actions;
const struct tc_action *action_b = b->actions;
if (a->action_count != b->action_count) {
log_tc_flower_match("tc flower compare failed action length check",
a, b);
return false;
}
for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
if (memcmp(action_a, action_b, sizeof *action_a)) {
log_tc_flower_match("tc flower compare failed action compare",
a, b);
return false;
}
}
return true;
}
int
tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
{
struct ofpbuf request;
struct ofpbuf *reply;
int error = 0;
size_t basic_offset;
uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
request_from_tcf_id(id, eth_type, RTM_NEWTFILTER,
NLM_F_CREATE | NLM_F_ECHO, &request);
nl_msg_put_string(&request, TCA_KIND, "flower");
basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
{
error = nl_msg_put_flower_options(&request, flower);
if (error) {
ofpbuf_uninit(&request);
return error;
}
}
nl_msg_end_nested(&request, basic_offset);
error = tc_transact(&request, &reply);
if (!error) {
struct tcmsg *tc =
ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
id->prio = tc_get_major(tc->tcm_info);
id->handle = tc->tcm_handle;
if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
struct tc_flower flower_out;
struct tcf_id id_out;
int ret;
ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
false);
if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
"not match request! Set dpif_netlink to dbg to "
"see which rule caused this error.");
}
}
ofpbuf_delete(reply);
}
return error;
}
void
tc_set_policy(const char *policy)
{
if (!policy) {
return;
}
if (!strcmp(policy, "skip_sw")) {
tc_policy = TC_POLICY_SKIP_SW;
} else if (!strcmp(policy, "skip_hw")) {
tc_policy = TC_POLICY_SKIP_HW;
} else if (!strcmp(policy, "none")) {
tc_policy = TC_POLICY_NONE;
} else {
VLOG_WARN("tc: Invalid policy '%s'", policy);
return;
}
VLOG_INFO("tc: Using policy '%s'", policy);
}