2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-30 22:05:19 +00:00

tc: Add header rewrite using tc pedit action

To be later used to implement ovs action set offloading.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellacom>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
This commit is contained in:
Paul Blakey
2017-09-18 07:16:03 +03:00
committed by Simon Horman
parent 0aee382755
commit 8ada482bbe
2 changed files with 385 additions and 3 deletions

372
lib/tc.c
View File

@@ -21,8 +21,10 @@
#include <errno.h>
#include <linux/if_ether.h>
#include <linux/rtnetlink.h>
#include <linux/tc_act/tc_csum.h>
#include <linux/tc_act/tc_gact.h>
#include <linux/tc_act/tc_mirred.h>
#include <linux/tc_act/tc_pedit.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <linux/tc_act/tc_vlan.h>
#include <linux/gen_stats.h>
@@ -33,11 +35,14 @@
#include "netlink-socket.h"
#include "netlink.h"
#include "openvswitch/ofpbuf.h"
#include "openvswitch/util.h"
#include "openvswitch/vlog.h"
#include "packets.h"
#include "timeval.h"
#include "unaligned.h"
#define MAX_PEDIT_OFFSETS 32
VLOG_DEFINE_THIS_MODULE(tc);
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
@@ -50,6 +55,82 @@ enum tc_offload_policy {
static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
struct tc_pedit_key_ex {
enum pedit_header_type htype;
enum pedit_cmd cmd;
};
struct flower_key_to_pedit {
enum pedit_header_type htype;
int flower_offset;
int offset;
int size;
};
static struct flower_key_to_pedit flower_pedit_map[] = {
{
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
12,
offsetof(struct tc_flower_key, ipv4.ipv4_src),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
16,
offsetof(struct tc_flower_key, ipv4.ipv4_dst),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
8,
offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
8,
offsetof(struct tc_flower_key, ipv6.ipv6_src),
MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
24,
offsetof(struct tc_flower_key, ipv6.ipv6_dst),
MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
6,
offsetof(struct tc_flower_key, src_mac),
MEMBER_SIZEOF(struct tc_flower_key, src_mac)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
0,
offsetof(struct tc_flower_key, dst_mac),
MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
12,
offsetof(struct tc_flower_key, eth_type),
MEMBER_SIZEOF(struct tc_flower_key, eth_type)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
0,
offsetof(struct tc_flower_key, tcp_src),
MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
2,
offsetof(struct tc_flower_key, tcp_dst),
MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
0,
offsetof(struct tc_flower_key, udp_src),
MEMBER_SIZEOF(struct tc_flower_key, udp_src)
}, {
TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
2,
offsetof(struct tc_flower_key, udp_dst),
MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
},
};
struct tcmsg *
tc_make_request(int ifindex, int type, unsigned int flags,
struct ofpbuf *request)
@@ -365,6 +446,96 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
}
}
static const struct nl_policy pedit_policy[] = {
[TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_pedit),
.optional = false, },
[TCA_PEDIT_KEYS_EX] = { .type = NL_A_NESTED,
.optional = false, },
};
static int
nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
{
struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
const struct tc_pedit *pe;
const struct tc_pedit_key *keys;
const struct nlattr *nla, *keys_ex, *ex_type;
const void *keys_attr;
char *rewrite_key = (void *) &flower->rewrite.key;
char *rewrite_mask = (void *) &flower->rewrite.mask;
size_t keys_ex_size, left;
int type, i = 0;
if (!nl_parse_nested(options, pedit_policy, pe_attrs,
ARRAY_SIZE(pedit_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse pedit action options");
return EPROTO;
}
pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof *pe);
keys = pe->keys;
keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX];
keys_ex = nl_attr_get(keys_attr);
keys_ex_size = nl_attr_get_size(keys_attr);
NL_ATTR_FOR_EACH (nla, left, keys_ex, keys_ex_size) {
if (i >= pe->nkeys) {
break;
}
if (nl_attr_type(nla) == TCA_PEDIT_KEY_EX) {
VLOG_ERR_RL(&error_rl, "unable to parse legacy pedit type: %d",
nl_attr_type(nla));
return EOPNOTSUPP;
}
ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
type = nl_attr_get_u16(ex_type);
for (int j = 0; j < ARRAY_SIZE(flower_pedit_map); j++) {
struct flower_key_to_pedit *m = &flower_pedit_map[j];
int flower_off = m->flower_offset;
int sz = m->size;
int mf = m->offset;
if (m->htype != type) {
continue;
}
/* check overlap between current pedit key, which is always
* 4 bytes (range [off, off + 3]), and a map entry in
* flower_pedit_map (range [mf, mf + sz - 1]) */
if ((keys->off >= mf && keys->off < mf + sz)
|| (keys->off + 3 >= mf && keys->off + 3 < mf + sz)) {
int diff = flower_off + (keys->off - mf);
uint32_t *dst = (void *) (rewrite_key + diff);
uint32_t *dst_m = (void *) (rewrite_mask + diff);
uint32_t mask = ~(keys->mask);
uint32_t zero_bits;
if (keys->off < mf) {
zero_bits = 8 * (mf - keys->off);
mask &= UINT32_MAX << zero_bits;
} else if (keys->off + 4 > mf + m->size) {
zero_bits = 8 * (keys->off + 4 - mf - m->size);
mask &= UINT32_MAX >> zero_bits;
}
*dst_m |= mask;
*dst |= keys->val & mask;
}
}
keys++;
i++;
}
flower->rewrite.rewrite = true;
return 0;
}
static const struct nl_policy tunnel_key_policy[] = {
[TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct tc_tunnel_key),
@@ -608,6 +779,11 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
nl_parse_act_vlan(act_options, flower);
} else if (!strcmp(act_kind, "tunnel_key")) {
nl_parse_act_tunnel_key(act_options, flower);
} else if (!strcmp(act_kind, "pedit")) {
nl_parse_act_pedit(act_options, flower);
} else if (!strcmp(act_kind, "csum")) {
/* not doing anything for now, ovs has an implicit csum recalculation
* with rewriting of packet headers (translating of pedit acts). */
} else {
VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
return EINVAL;
@@ -808,6 +984,48 @@ tc_get_tc_cls_policy(enum tc_offload_policy policy)
return 0;
}
static void
nl_msg_put_act_csum(struct ofpbuf *request, uint32_t flags)
{
size_t offset;
nl_msg_put_string(request, TCA_ACT_KIND, "csum");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
struct tc_csum parm = { .action = TC_ACT_PIPE,
.update_flags = flags };
nl_msg_put_unspec(request, TCA_CSUM_PARMS, &parm, sizeof parm);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_pedit(struct ofpbuf *request, struct tc_pedit *parm,
struct tc_pedit_key_ex *ex)
{
size_t ksize = sizeof *parm + (parm->nkeys * sizeof(struct tc_pedit_key));
size_t offset, offset_keys_ex, offset_key;
int i;
nl_msg_put_string(request, TCA_ACT_KIND, "pedit");
offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
{
parm->action = TC_ACT_PIPE;
nl_msg_put_unspec(request, TCA_PEDIT_PARMS_EX, parm, ksize);
offset_keys_ex = nl_msg_start_nested(request, TCA_PEDIT_KEYS_EX);
for (i = 0; i < parm->nkeys; i++, ex++) {
offset_key = nl_msg_start_nested(request, TCA_PEDIT_KEY_EX);
nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_HTYPE, ex->htype);
nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_CMD, ex->cmd);
nl_msg_end_nested(request, offset_key);
}
nl_msg_end_nested(request, offset_keys_ex);
}
nl_msg_end_nested(request, offset);
}
static void
nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
{
@@ -930,7 +1148,127 @@ nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
}
}
/* Given flower, a key_to_pedit map entry, calculates the rest,
* where:
*
* mask, data - pointers of where read the first word of flower->key/mask.
* current_offset - which offset to use for the first pedit action.
* cnt - max pedits actions to use.
* first_word_mask/last_word_mask - the mask to use for the first/last read
* (as we read entire words). */
static void
calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m,
int *cur_offset, int *cnt, uint32_t *last_word_mask,
uint32_t *first_word_mask, uint32_t **mask, uint32_t **data)
{
int start_offset, max_offset, total_size;
int diff, right_zero_bits, left_zero_bits;
char *rewrite_key = (void *) &flower->rewrite.key;
char *rewrite_mask = (void *) &flower->rewrite.mask;
max_offset = m->offset + m->size;
start_offset = ROUND_DOWN(m->offset, 4);
diff = m->offset - start_offset;
total_size = max_offset - start_offset;
right_zero_bits = 8 * (4 - (max_offset % 4));
left_zero_bits = 8 * (m->offset - start_offset);
*cur_offset = start_offset;
*cnt = (total_size / 4) + (total_size % 4 ? 1 : 0);
*last_word_mask = UINT32_MAX >> right_zero_bits;
*first_word_mask = UINT32_MAX << left_zero_bits;
*data = (void *) (rewrite_key + m->flower_offset - diff);
*mask = (void *) (rewrite_mask + m->flower_offset - diff);
}
static inline void
csum_update_flag(struct tc_flower *flower,
enum pedit_header_type htype) {
if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4) {
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
}
if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
|| htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
|| htype == TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
|| htype == TCA_PEDIT_KEY_EX_HDR_TYPE_UDP) {
if (flower->key.ip_proto == IPPROTO_TCP) {
flower->mask.ip_proto = UINT8_MAX;
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
} else if (flower->key.ip_proto == IPPROTO_UDP) {
flower->mask.ip_proto = UINT8_MAX;
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
} else if (flower->key.ip_proto == IPPROTO_ICMP
|| flower->key.ip_proto == IPPROTO_ICMPV6) {
flower->mask.ip_proto = UINT8_MAX;
flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_ICMP;
}
}
}
static int
nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
struct tc_flower *flower)
{
struct {
struct tc_pedit sel;
struct tc_pedit_key keys[MAX_PEDIT_OFFSETS];
struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS];
} sel = {
.sel = {
.nkeys = 0
}
};
int i, j;
for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) {
struct flower_key_to_pedit *m = &flower_pedit_map[i];
struct tc_pedit_key *pedit_key = NULL;
struct tc_pedit_key_ex *pedit_key_ex = NULL;
uint32_t *mask, *data, first_word_mask, last_word_mask;
int cnt = 0, cur_offset = 0;
if (!m->size) {
continue;
}
calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask,
&first_word_mask, &mask, &data);
for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) {
uint32_t mask_word = *mask;
if (j == 0) {
mask_word &= first_word_mask;
}
if (j == cnt - 1) {
mask_word &= last_word_mask;
}
if (!mask_word) {
continue;
}
if (sel.sel.nkeys == MAX_PEDIT_OFFSETS) {
VLOG_WARN_RL(&error_rl, "reached too many pedit offsets: %d",
MAX_PEDIT_OFFSETS);
return EOPNOTSUPP;
}
pedit_key = &sel.keys[sel.sel.nkeys];
pedit_key_ex = &sel.keys_ex[sel.sel.nkeys];
pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_SET;
pedit_key_ex->htype = m->htype;
pedit_key->off = cur_offset;
pedit_key->mask = ~mask_word;
pedit_key->val = *data & mask_word;
sel.sel.nkeys++;
csum_update_flag(flower, m->htype);
}
}
nl_msg_put_act_pedit(request, &sel.sel, sel.keys_ex);
return 0;
}
static int
nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
{
size_t offset;
@@ -939,7 +1277,20 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
{
uint16_t act_index = 1;
int error;
if (flower->rewrite.rewrite) {
act_offset = nl_msg_start_nested(request, act_index++);
error = nl_msg_put_flower_rewrite_pedits(request, flower);
if (error) {
return error;
}
nl_msg_end_nested(request, act_offset);
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_csum(request, flower->csum_update_flags);
nl_msg_end_nested(request, act_offset);
}
if (flower->set.set) {
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_tunnel_key_set(request, flower->set.id,
@@ -980,6 +1331,8 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
}
}
nl_msg_end_nested(request, offset);
return 0;
}
static void
@@ -1021,11 +1374,19 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
nl_msg_put_masked_value(request, type, type##_MASK, &flower->key.member, \
&flower->mask.member, sizeof flower->key.member)
static void
static int
nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
{
uint16_t host_eth_type = ntohs(flower->key.eth_type);
bool is_vlan = (host_eth_type == ETH_TYPE_VLAN);
int err;
/* need to parse acts first as some acts require changing the matching */
err = nl_msg_put_flower_acts(request, flower);
if (err) {
return err;
}
if (is_vlan) {
host_eth_type = ntohs(flower->key.encap_eth_type);
@@ -1083,7 +1444,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
nl_msg_put_flower_tunnel(request, flower);
}
nl_msg_put_flower_acts(request, flower);
return 0;
}
int
@@ -1106,7 +1467,12 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
nl_msg_put_string(&request, TCA_KIND, "flower");
basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
{
nl_msg_put_flower_options(&request, flower);
error = nl_msg_put_flower_options(&request, flower);
if (error) {
ofpbuf_uninit(&request);
return error;
}
}
nl_msg_end_nested(&request, basic_offset);

View File

@@ -96,6 +96,7 @@ struct tc_flower_key {
struct {
ovs_be32 ipv4_src;
ovs_be32 ipv4_dst;
uint8_t rewrite_ttl;
} ipv4;
struct {
struct in6_addr ipv6_src;
@@ -119,6 +120,14 @@ struct tc_flower {
struct ovs_flow_stats stats;
uint64_t lastused;
struct {
bool rewrite;
struct tc_flower_key key;
struct tc_flower_key mask;
} rewrite;
uint32_t csum_update_flags;
struct {
bool set;
ovs_be64 id;
@@ -152,6 +161,13 @@ struct tc_flower {
struct tc_cookie act_cookie;
};
/* assert that if we overflow with a masked write of uint32_t to the last byte
* of flower.rewrite we overflow inside struct flower.
* shouldn't happen unless someone moves rewrite to the end of flower */
BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
+ MEMBER_SIZEOF(struct tc_flower, rewrite)
+ sizeof(uint32_t) - 2 < sizeof(struct tc_flower));
int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
struct tc_flower *flower);
int tc_del_filter(int ifindex, int prio, int handle);