2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 18:07:40 +00:00
ovs/lib/tc.h

416 lines
10 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
* Copyright (c) 2016 Mellanox Technologies, Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TC_H
#define TC_H 1
#include <sys/types.h>
#include <netinet/in.h> /* Must happen before linux/pkt_cls.h - Glibc #20215 */
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include "netlink.h"
#include "netlink-socket.h"
#include "odp-netlink.h"
#include "openvswitch/ofpbuf.h"
#include "openvswitch/flow.h"
#include "openvswitch/tun-metadata.h"
/* For backwards compatability with older kernels */
#ifndef TC_H_CLSACT
#define TC_H_CLSACT TC_H_INGRESS
#endif
#ifndef TC_H_MIN_INGRESS
#define TC_H_MIN_INGRESS 0xFFF2U
#endif
#ifndef TC_H_MIN_EGRESS
#define TC_H_MIN_EGRESS 0xFFF3U
#endif
#define TC_INGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)
#define TC_EGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)
#define TC_POLICY_DEFAULT "none"
2019-02-01 10:19:32 +00:00
enum tc_flower_reserved_prio {
TC_RESERVED_PRIORITY_NONE,
TC_RESERVED_PRIORITY_POLICE,
TC_RESERVED_PRIORITY_IPV4,
TC_RESERVED_PRIORITY_IPV6,
2019-02-01 10:19:32 +00:00
__TC_RESERVED_PRIORITY_MAX
};
#define TC_RESERVED_PRIORITY_MAX (__TC_RESERVED_PRIORITY_MAX -1)
enum tc_qdisc_hook {
TC_INGRESS,
TC_EGRESS,
};
#define METER_POLICE_IDS_BASE 0x10000000
#define METER_POLICE_IDS_MAX 0x1FFFFFFF
static inline bool
tc_is_meter_index(uint32_t index) {
if (index >= METER_POLICE_IDS_BASE && index <= METER_POLICE_IDS_MAX) {
return true;
}
return false;
}
/* Returns tc handle 'major':'minor'. */
static inline unsigned int
tc_make_handle(unsigned int major, unsigned int minor)
{
return TC_H_MAKE(major << 16, minor);
}
/* Returns the major number from 'handle'. */
static inline unsigned int
tc_get_major(unsigned int handle)
{
return TC_H_MAJ(handle) >> 16;
}
/* Returns the minor number from 'handle'. */
static inline unsigned int
tc_get_minor(unsigned int handle)
{
return TC_H_MIN(handle);
}
struct tcmsg *tc_make_request(int ifindex, int type,
unsigned int flags, struct ofpbuf *);
struct tcamsg *tc_make_action_request(int type, unsigned int flags,
struct ofpbuf *request);
int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
int tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id,
enum tc_qdisc_hook hook);
struct tc_cookie {
const void *data;
size_t len;
};
struct tc_tunnel_gbp {
ovs_be16 id;
uint8_t flags;
bool id_present;
};
struct tc_flower_tunnel {
struct {
ovs_be32 ipv4_src;
ovs_be32 ipv4_dst;
} ipv4;
struct {
struct in6_addr ipv6_src;
struct in6_addr ipv6_dst;
} ipv6;
uint8_t tos;
uint8_t ttl;
ovs_be16 tp_src;
ovs_be16 tp_dst;
struct tc_tunnel_gbp gbp;
ovs_be64 id;
struct tun_metadata metadata;
};
struct tc_flower_key {
ovs_be16 eth_type;
uint8_t ip_proto;
struct eth_addr dst_mac;
struct eth_addr src_mac;
ovs_be32 mpls_lse;
ovs_be16 tcp_src;
ovs_be16 tcp_dst;
ovs_be16 tcp_flags;
ovs_be16 udp_src;
ovs_be16 udp_dst;
ovs_be16 sctp_src;
ovs_be16 sctp_dst;
uint8_t icmp_code;
uint8_t icmp_type;
uint16_t vlan_id[FLOW_MAX_VLAN_HEADERS];
uint8_t vlan_prio[FLOW_MAX_VLAN_HEADERS];
ovs_be16 encap_eth_type[FLOW_MAX_VLAN_HEADERS];
uint8_t flags;
uint8_t ip_ttl;
uint8_t ip_tos;
uint16_t ct_state;
uint16_t ct_zone;
uint32_t ct_mark;
ovs_u128 ct_label;
struct {
ovs_be32 spa;
ovs_be32 tpa;
struct eth_addr sha;
struct eth_addr tha;
uint8_t opcode;
} arp;
struct {
ovs_be32 ipv4_src;
ovs_be32 ipv4_dst;
uint8_t rewrite_ttl;
uint8_t rewrite_tos;
} ipv4;
struct {
struct in6_addr ipv6_src;
struct in6_addr ipv6_dst;
uint8_t rewrite_hlimit;
uint8_t rewrite_tclass;
} ipv6;
struct tc_flower_tunnel tunnel;
};
enum tc_action_type {
TC_ACT_OUTPUT,
TC_ACT_ENCAP,
TC_ACT_PEDIT,
TC_ACT_VLAN_POP,
TC_ACT_VLAN_PUSH,
TC_ACT_MPLS_POP,
TC_ACT_MPLS_PUSH,
TC_ACT_MPLS_SET,
TC_ACT_GOTO,
TC_ACT_CT,
TC_ACT_POLICE,
TC_ACT_POLICE_MTU,
};
enum nat_type {
TC_NO_NAT = 0,
TC_NAT_SRC,
TC_NAT_DST,
TC_NAT_RESTORE,
};
struct tc_action {
union {
int chain;
struct {
int ifindex_out;
bool ingress;
} out;
struct {
ovs_be16 vlan_push_tpid;
uint16_t vlan_push_id;
uint8_t vlan_push_prio;
} vlan;
struct {
ovs_be16 proto;
uint32_t label;
uint8_t tc;
uint8_t ttl;
uint8_t bos;
} mpls;
struct {
bool id_present;
ovs_be64 id;
ovs_be16 tp_src;
ovs_be16 tp_dst;
uint8_t tos;
uint8_t ttl;
uint8_t no_csum;
struct {
ovs_be32 ipv4_src;
ovs_be32 ipv4_dst;
} ipv4;
struct {
struct in6_addr ipv6_src;
struct in6_addr ipv6_dst;
} ipv6;
struct tun_metadata data;
} encap;
struct {
uint16_t zone;
uint32_t mark;
uint32_t mark_mask;
ovs_u128 label;
ovs_u128 label_mask;
uint8_t nat_type;
struct {
uint8_t ip_family;
union {
struct {
ovs_be32 min;
ovs_be32 max;
} ipv4;
struct {
struct in6_addr min;
struct in6_addr max;
} ipv6;
};
struct {
ovs_be16 min;
ovs_be16 max;
} port;
} range;
bool clear;
bool force;
bool commit;
} ct;
struct {
struct tc_flower_key key;
struct tc_flower_key mask;
} rewrite;
struct {
uint32_t index;
uint32_t result_jump;
uint16_t mtu;
} police;
};
enum tc_action_type type;
uint32_t jump_action;
#define JUMP_ACTION_STOP 0xffffffff
};
/* assert that if we overflow with a masked write of uint32_t to the last byte
* of action.rewrite we overflow inside struct tc_action.
* shouldn't happen unless someone moves rewrite to the end of action */
BUILD_ASSERT_DECL(offsetof(struct tc_action, rewrite)
+ MEMBER_SIZEOF(struct tc_action, rewrite)
+ sizeof(uint32_t) - 2 < sizeof(struct tc_action));
enum tc_offloaded_state {
TC_OFFLOADED_STATE_UNDEFINED,
TC_OFFLOADED_STATE_IN_HW,
TC_OFFLOADED_STATE_NOT_IN_HW,
};
#define TCA_ACT_MAX_NUM 16
struct tcf_id {
enum tc_qdisc_hook hook;
uint32_t block_id;
int ifindex;
uint32_t chain;
uint16_t prio;
uint32_t handle;
};
static inline struct tcf_id
tc_make_tcf_id(int ifindex, uint32_t block_id, uint16_t prio,
enum tc_qdisc_hook hook)
{
struct tcf_id id = {
.hook = hook,
.block_id = block_id,
.ifindex = ifindex,
.prio = prio,
};
return id;
}
static inline struct tcf_id
tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
uint16_t prio, enum tc_qdisc_hook hook)
{
struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
id.chain = chain;
return id;
}
static inline bool
is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2)
{
return id1->prio == id2->prio
&& id1->handle == id2->handle
&& id1->hook == id2->hook
&& id1->block_id == id2->block_id
&& id1->ifindex == id2->ifindex
&& id1->chain == id2->chain;
}
enum tc_offload_policy {
TC_POLICY_NONE = 0,
TC_POLICY_SKIP_SW,
TC_POLICY_SKIP_HW
};
BUILD_ASSERT_DECL(TC_POLICY_NONE == 0);
struct tc_flower {
struct tc_flower_key key;
struct tc_flower_key mask;
int action_count;
struct tc_action actions[TCA_ACT_MAX_NUM];
tc: Fix stats byte count on fragmented packets. Fragmented packets with offset=0 are defragmented by tc act_ct, and only when assembled pass to next action, in ovs offload case, a goto action. Since stats are overwritten on each action dump, only the stats for last action in the tc filter action priority list is taken, the stats on the goto action, which count only the assembled packets. See below for example. Hardware updates just part of the actions (gact, ct, mirred) - those that support stats_update() operation. Since datapath rules end with either an output (mirred) or recirc/drop (both gact), tc rule will at least have one action that supports it. For software packets, the first action will have the max software packets count. Tc dumps total packets (hw + sw) and hardware packets, then software packets needs to be calculated from this (total - hw). To fix the above, get hardware packets and calculate software packets for each action, take the max of each set, then combine back to get the total packets that went through software and hardware. Example by running ping above MTU (ping <IP> -s 2000): ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first), packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1) ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later), packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1) Second rule should have had bytes=14*<size of 'later' frag>, but instead it's bytes=14*<size of assembled packets - size of 'first' + 'later' frags>. Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support") Signed-off-by: Paul Blakey <paulb@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-11-07 14:12:35 +02:00
struct ovs_flow_stats stats_sw;
struct ovs_flow_stats stats_hw;
uint64_t lastused;
uint32_t csum_update_flags;
bool tunnel;
struct tc_cookie act_cookie;
bool needs_full_ip_proto_mask;
enum tc_offloaded_state offloaded_state;
/* Used to force skip_hw when probing tc features. */
enum tc_offload_policy tc_policy;
};
int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower);
int tc_del_filter(struct tcf_id *id, const char *kind);
int tc_del_flower_filter(struct tcf_id *id);
int tc_get_flower(struct tcf_id *id, struct tc_flower *flower);
int tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump, bool terse);
int tc_dump_tc_chain_start(struct tcf_id *id, struct nl_dump *dump);
int parse_netlink_to_tc_flower(struct ofpbuf *reply,
struct tcf_id *id,
struct tc_flower *flower,
bool terse);
int parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain);
void tc_set_policy(const char *policy);
int tc_parse_action_stats(struct nlattr *action,
struct ovs_flow_stats *stats_sw,
struct ovs_flow_stats *stats_hw,
struct ovs_flow_stats *stats_dropped);
int tc_dump_tc_action_start(char *name, struct nl_dump *dump);
int parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[]);
void nl_msg_put_act_tc_policy_flag(struct ofpbuf *request);
#endif /* tc.h */