mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 01:51:26 +00:00
netdev-offload-tc: Only install recirc flows if the parent is present.
When flows are added through TC, only the match and actions are verified to determine if they can be handled by TC. If they can, the TC flow is installed. However, when the flow is a continuation of a previously recirculated flow, it can happen that the flow performing the recirculation is installed in the kernel. This may occur, for example, if it includes an action that cannot be handled by TC. If the kernel module has the first flow but not the second one (missing because it is programmed in TC), the flow is sent to userspace via an upcall. This patch tracks which recirculation goto actions are handled by TC. A matching TC rule is installed only if the corresponding recirculation ID is confirmed to be handled by TC. Acked-by: Simon Horman <horms@ovn.org> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
This commit is contained in:
parent
0fb370bdcd
commit
5292eb50ae
@ -19,6 +19,7 @@
|
||||
#include <errno.h>
|
||||
#include <linux/if_ether.h>
|
||||
|
||||
#include "ccmap.h"
|
||||
#include "dpif.h"
|
||||
#include "hash.h"
|
||||
#include "id-pool.h"
|
||||
@ -78,6 +79,10 @@ struct policer_node {
|
||||
uint32_t police_idx;
|
||||
};
|
||||
|
||||
/* ccmap and protective mutex for counting recirculation id (chain) usage. */
|
||||
static struct ovs_mutex used_chains_mutex = OVS_MUTEX_INITIALIZER;
|
||||
static struct ccmap used_chains OVS_GUARDED;
|
||||
|
||||
/* Protects below meter police ids pool. */
|
||||
static struct ovs_mutex meter_police_ids_mutex = OVS_MUTEX_INITIALIZER;
|
||||
static struct id_pool *meter_police_ids OVS_GUARDED_BY(meter_police_ids_mutex);
|
||||
@ -204,6 +209,10 @@ static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
|
||||
* @adjust_stats: When flow gets updated with new actions, we need to adjust
|
||||
* the reported stats to include previous values as the hardware
|
||||
* rule is removed and re-added. This stats copy is used for it.
|
||||
* @chain_goto: If a TC jump action exists for the flow, the target chain it
|
||||
* jumps to is stored here. Only a single goto action is stored,
|
||||
* as TC supports only one goto action per flow (there is no
|
||||
* return mechanism).
|
||||
*/
|
||||
struct ufid_tc_data {
|
||||
struct hmap_node ufid_to_tc_node;
|
||||
@ -212,6 +221,7 @@ struct ufid_tc_data {
|
||||
struct tcf_id id;
|
||||
struct netdev *netdev;
|
||||
struct dpif_flow_stats adjust_stats;
|
||||
uint32_t chain_goto;
|
||||
};
|
||||
|
||||
static void
|
||||
@ -233,6 +243,13 @@ del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
|
||||
hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
|
||||
hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
|
||||
netdev_close(data->netdev);
|
||||
|
||||
if (data->chain_goto) {
|
||||
ovs_mutex_lock(&used_chains_mutex);
|
||||
ccmap_dec(&used_chains, data->chain_goto);
|
||||
ovs_mutex_unlock(&used_chains_mutex);
|
||||
}
|
||||
|
||||
free(data);
|
||||
}
|
||||
|
||||
@ -288,7 +305,8 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
|
||||
/* Add ufid entry to ufid_to_tc hashmap. */
|
||||
static void
|
||||
add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
|
||||
struct tcf_id *id, struct dpif_flow_stats *stats)
|
||||
struct tcf_id *id, struct dpif_flow_stats *stats,
|
||||
uint32_t chain_goto)
|
||||
{
|
||||
struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
|
||||
size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
|
||||
@ -300,6 +318,8 @@ add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
|
||||
new_data->ufid = *ufid;
|
||||
new_data->id = *id;
|
||||
new_data->netdev = netdev_ref(netdev);
|
||||
new_data->chain_goto = chain_goto;
|
||||
|
||||
if (stats) {
|
||||
new_data->adjust_stats = *stats;
|
||||
}
|
||||
@ -2315,6 +2335,17 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
|
||||
chain = key->recirc_id;
|
||||
mask->recirc_id = 0;
|
||||
|
||||
if (chain) {
|
||||
/* If we match on a recirculation ID, we must ensure the previous
|
||||
* flow is also in the TC datapath; otherwise, the entry is useless,
|
||||
* as the related packets will be handled by upcalls. */
|
||||
if (!ccmap_find(&used_chains, chain)) {
|
||||
VLOG_DBG_RL(&rl, "match for chain %u failed due to non-existing "
|
||||
"goto chain action", chain);
|
||||
return EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
if (flow_tnl_dst_is_set(&key->tunnel) ||
|
||||
flow_tnl_src_is_set(&key->tunnel)) {
|
||||
VLOG_DBG_RL(&rl,
|
||||
@ -2657,11 +2688,28 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
|
||||
id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
|
||||
err = tc_replace_flower(&id, &flower);
|
||||
if (!err) {
|
||||
uint32_t chain_goto = 0;
|
||||
|
||||
if (stats) {
|
||||
memset(stats, 0, sizeof *stats);
|
||||
netdev_tc_adjust_stats(stats, &adjust_stats);
|
||||
}
|
||||
add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
|
||||
|
||||
if (recirc_act) {
|
||||
struct tc_action *action = flower.actions;
|
||||
|
||||
for (int i = 0; i < flower.action_count; i++, action++) {
|
||||
if (action->type == TC_ACT_GOTO && action->chain) {
|
||||
chain_goto = action->chain;
|
||||
ovs_mutex_lock(&used_chains_mutex);
|
||||
ccmap_inc(&used_chains, chain_goto);
|
||||
ovs_mutex_unlock(&used_chains_mutex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats, chain_goto);
|
||||
}
|
||||
|
||||
return err;
|
||||
@ -3138,6 +3186,8 @@ netdev_tc_init_flow_api(struct netdev *netdev)
|
||||
tc_add_del_qdisc(ifindex, false, 0, hook);
|
||||
|
||||
if (ovsthread_once_start(&once)) {
|
||||
ccmap_init(&used_chains);
|
||||
|
||||
probe_tc_block_support(ifindex);
|
||||
/* Need to re-fetch block id as it depends on feature availability. */
|
||||
block_id = get_block_id_from_netdev(netdev);
|
||||
|
@ -1047,3 +1047,122 @@ AT_CHECK([ovs-appctl dpctl/dump-flows type=ovs | grep "eth_type(0x0800)" | DUMP_
|
||||
|
||||
OVS_TRAFFIC_VSWITCHD_STOP
|
||||
AT_CLEANUP
|
||||
|
||||
AT_SETUP([offloads - split kernel vs offload datapath rules])
|
||||
OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
|
||||
|
||||
ADD_NAMESPACES(at_ns0, at_ns1)
|
||||
|
||||
ADD_VETH(p1, at_ns0, br0, "10.0.0.1/24")
|
||||
ADD_VETH(p2, at_ns1, br0, "10.0.0.2/24")
|
||||
|
||||
AT_CHECK([ovs-vsctl -- set interface ovs-p1 ofport_request=1 \
|
||||
-- set interface ovs-p2 ofport_request=2])
|
||||
|
||||
AT_DATA([groups.txt], [dnl
|
||||
group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=2)
|
||||
])
|
||||
AT_DATA([flows.txt], [dnl
|
||||
table=0,arp,actions=NORMAL
|
||||
table=0,priority=100,cookie=0x12345678,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=resubmit(,1)
|
||||
table=0,priority=100,cookie=0xabcedf,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ct(table=3)
|
||||
table=1,priority=200,ip,actions=group:1
|
||||
table=2,ip,actions=ovs-p2
|
||||
table=3,ip,actions=ovs-p1
|
||||
])
|
||||
AT_CHECK([ovs-ofctl add-groups br0 groups.txt])
|
||||
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
|
||||
|
||||
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
|
||||
3 packets transmitted, 3 received, 0% packet loss, time 0ms
|
||||
])
|
||||
|
||||
AT_CHECK([ovs-appctl revalidator/wait])
|
||||
|
||||
dnl In this test we should not have the first recirculation(s) in the kernel
|
||||
dnl datapath, and the final flow in TC. They should all be in the kernel
|
||||
dnl datapath, as the dp_hash() action is currently not supported by TC.
|
||||
dnl The command below ensures they are all handled in the kernel datapath.
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
|
||||
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:2, bytes:196, used:0.001s, actions:hash(l4(0)),recirc(<recirc>)
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:196, used:0.001s, actions:ct(commit),recirc(<recirc>)
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:196, used:0.001s, actions:ovs-p2
|
||||
])
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=tc filter='in_port(ovs-p1),ipv4' | \
|
||||
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
|
||||
])
|
||||
|
||||
dnl The next test will install two DP flows that are both accepted in the
|
||||
dnl TC datapath. But then the first one is moved to the kernel datapath,
|
||||
dnl we have to make sure both flows are moved to kernel (and no dual rules
|
||||
dnl exist).
|
||||
|
||||
AT_DATA([flows.txt], [dnl
|
||||
table=0,arp,actions=NORMAL
|
||||
table=0,priority=100,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=ct(table=1)
|
||||
table=0,priority=100,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ovs-p1
|
||||
table=1,ip,actions=ovs-p2
|
||||
])
|
||||
|
||||
AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=2000])
|
||||
AT_CHECK([ovs-ofctl del-flows br0])
|
||||
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
|
||||
AT_CHECK([ovs-appctl revalidator/purge])
|
||||
|
||||
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
|
||||
3 packets transmitted, 3 received, 0% packet loss, time 0ms
|
||||
])
|
||||
|
||||
AT_CHECK([ovs-appctl revalidator/wait])
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=tc filter='in_port(ovs-p1),ipv4' | \
|
||||
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:2, bytes:168, used:0.001s, actions:ct,recirc(<recirc>)
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:168, used:0.001s, actions:ovs-p2
|
||||
])
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
|
||||
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
|
||||
])
|
||||
|
||||
AT_DATA([flows.txt], [dnl
|
||||
table=0,priority=100,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=controller(),ct(table=1)
|
||||
])
|
||||
|
||||
AT_CHECK([ovs-ofctl del-flows br0 table=0,in_port=ovs-p1,ip,nw_dst=10.0.0.2])
|
||||
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
|
||||
AT_CHECK([ovs-appctl revalidator/wait])
|
||||
|
||||
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
|
||||
3 packets transmitted, 3 received, 0% packet loss, time 0ms
|
||||
])
|
||||
|
||||
AT_CHECK([ovs-appctl revalidator/wait])
|
||||
|
||||
dnl Unfortunately, until the unreachable TC rule times out, packets will be
|
||||
dnl handled by upcalls. So we first check for this case and then verify that
|
||||
dnl the rule finally moved.
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=tc filter='in_port(ovs-p1),ipv4' | \
|
||||
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:168, used:0.001s, actions:ovs-p2
|
||||
])
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
|
||||
strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
|
||||
recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:3, bytes:294, used:0.001s, actions:userspace(pid=4294967295,controller(reason=1,dont_send=0,continuation=0,recirc_id=<recirc>,rule_cookie=0,controller_id=0,max_len=65535)),ct,recirc(<recirc>)
|
||||
])
|
||||
|
||||
check_rules_and_ping() {
|
||||
NS_CHECK_EXEC([at_ns0], [ping -q -c 1 -i 0.1 -W 2 10.0.0.2], [0], [ignore])
|
||||
AT_CHECK([ovs-appctl revalidator/wait])
|
||||
AT_CHECK([ovs-appctl dpctl/dump-flows -m filter='in_port(ovs-p1),ipv4'],
|
||||
[0], [stdout])
|
||||
|
||||
[[ "$(wc -l < stdout)" -ne 2 ]] && return 0
|
||||
grep -q "dp:tc" stdout
|
||||
}
|
||||
|
||||
OVS_WAIT_WHILE(
|
||||
[check_rules_and_ping],
|
||||
[ovs-appctl dpctl/dump-flows -m --names filter='in_port(ovs-p1),ipv4'])
|
||||
|
||||
OVS_TRAFFIC_VSWITCHD_STOP
|
||||
AT_CLEANUP
|
||||
|
Loading…
x
Reference in New Issue
Block a user