/* * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc. * Copyright (c) 2019 Mellanox Technologies, Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "cmap.h" #include "dpif-netdev.h" #include "netdev-offload-provider.h" #include "netdev-provider.h" #include "openvswitch/match.h" #include "openvswitch/vlog.h" #include "packets.h" #include "uuid.h" VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5); /* Thread-safety * ============= * * Below API is NOT thread safe in following terms: * * - The caller must be sure that none of these functions will be called * simultaneously. Even for different 'netdev's. * * - The caller must be sure that 'netdev' will not be destructed/deallocated. * * - The caller must be sure that 'netdev' configuration will not be changed. * For example, simultaneous call of 'netdev_reconfigure()' for the same * 'netdev' is forbidden. * * For current implementation all above restrictions could be fulfilled by * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */ /* * A mapping from ufid to dpdk rte_flow. */ static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; struct ufid_to_rte_flow_data { struct cmap_node node; ovs_u128 ufid; struct rte_flow *rte_flow; bool actions_offloaded; struct dpif_flow_stats stats; }; /* Find rte_flow with @ufid. */ static struct ufid_to_rte_flow_data * ufid_to_rte_flow_data_find(const ovs_u128 *ufid) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data; CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { if (ovs_u128_equals(*ufid, data->ufid)) { return data; } } return NULL; } static inline void ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct rte_flow *rte_flow, bool actions_offloaded) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data); struct ufid_to_rte_flow_data *data_prev; /* * We should not simply overwrite an existing rte flow. * We should have deleted it first before re-adding it. * Thus, if following assert triggers, something is wrong: * the rte_flow is not destroyed. */ data_prev = ufid_to_rte_flow_data_find(ufid); if (data_prev) { ovs_assert(data_prev->rte_flow == NULL); } data->ufid = *ufid; data->rte_flow = rte_flow; data->actions_offloaded = actions_offloaded; cmap_insert(&ufid_to_rte_flow, CONST_CAST(struct cmap_node *, &data->node), hash); } static inline void ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data; CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { if (ovs_u128_equals(*ufid, data->ufid)) { cmap_remove(&ufid_to_rte_flow, CONST_CAST(struct cmap_node *, &data->node), hash); ovsrcu_postpone(free, data); return; } } VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", UUID_ARGS((struct uuid *) ufid)); } /* * To avoid individual xrealloc calls for each new element, a 'curent_max' * is used to keep track of current allocated number of elements. Starts * by 8 and doubles on each xrealloc call. */ struct flow_patterns { struct rte_flow_item *items; int cnt; int current_max; }; struct flow_actions { struct rte_flow_action *actions; int cnt; int current_max; }; static void dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr) { ds_put_format(s, " Attributes: " "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n", attr->ingress, attr->egress, attr->priority, attr->group, attr->transfer); } static void dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) { if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { const struct rte_flow_item_eth *eth_spec = item->spec; const struct rte_flow_item_eth *eth_mask = item->mask; ds_put_cstr(s, "rte flow eth pattern:\n"); if (eth_spec) { ds_put_format(s, " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " "type=0x%04" PRIx16"\n", ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), ntohs(eth_spec->type)); } else { ds_put_cstr(s, " Spec = null\n"); } if (eth_mask) { ds_put_format(s, " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " "type=0x%04"PRIx16"\n", ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), ntohs(eth_mask->type)); } else { ds_put_cstr(s, " Mask = null\n"); } } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { const struct rte_flow_item_vlan *vlan_spec = item->spec; const struct rte_flow_item_vlan *vlan_mask = item->mask; ds_put_cstr(s, "rte flow vlan pattern:\n"); if (vlan_spec) { ds_put_format(s, " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); } else { ds_put_cstr(s, " Spec = null\n"); } if (vlan_mask) { ds_put_format(s, " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); } else { ds_put_cstr(s, " Mask = null\n"); } } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; ds_put_cstr(s, "rte flow ipv4 pattern:\n"); if (ipv4_spec) { ds_put_format(s, " Spec: tos=0x%"PRIx8", ttl=%"PRIx8 ", proto=0x%"PRIx8 ", src="IP_FMT", dst="IP_FMT"\n", ipv4_spec->hdr.type_of_service, ipv4_spec->hdr.time_to_live, ipv4_spec->hdr.next_proto_id, IP_ARGS(ipv4_spec->hdr.src_addr), IP_ARGS(ipv4_spec->hdr.dst_addr)); } else { ds_put_cstr(s, " Spec = null\n"); } if (ipv4_mask) { ds_put_format(s, " Mask: tos=0x%"PRIx8", ttl=%"PRIx8 ", proto=0x%"PRIx8 ", src="IP_FMT", dst="IP_FMT"\n", ipv4_mask->hdr.type_of_service, ipv4_mask->hdr.time_to_live, ipv4_mask->hdr.next_proto_id, IP_ARGS(ipv4_mask->hdr.src_addr), IP_ARGS(ipv4_mask->hdr.dst_addr)); } else { ds_put_cstr(s, " Mask = null\n"); } } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { const struct rte_flow_item_udp *udp_spec = item->spec; const struct rte_flow_item_udp *udp_mask = item->mask; ds_put_cstr(s, "rte flow udp pattern:\n"); if (udp_spec) { ds_put_format(s, " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", ntohs(udp_spec->hdr.src_port), ntohs(udp_spec->hdr.dst_port)); } else { ds_put_cstr(s, " Spec = null\n"); } if (udp_mask) { ds_put_format(s, " Mask: src_port=0x%"PRIx16 ", dst_port=0x%"PRIx16"\n", ntohs(udp_mask->hdr.src_port), ntohs(udp_mask->hdr.dst_port)); } else { ds_put_cstr(s, " Mask = null\n"); } } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { const struct rte_flow_item_sctp *sctp_spec = item->spec; const struct rte_flow_item_sctp *sctp_mask = item->mask; ds_put_cstr(s, "rte flow sctp pattern:\n"); if (sctp_spec) { ds_put_format(s, " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", ntohs(sctp_spec->hdr.src_port), ntohs(sctp_spec->hdr.dst_port)); } else { ds_put_cstr(s, " Spec = null\n"); } if (sctp_mask) { ds_put_format(s, " Mask: src_port=0x%"PRIx16 ", dst_port=0x%"PRIx16"\n", ntohs(sctp_mask->hdr.src_port), ntohs(sctp_mask->hdr.dst_port)); } else { ds_put_cstr(s, " Mask = null\n"); } } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { const struct rte_flow_item_icmp *icmp_spec = item->spec; const struct rte_flow_item_icmp *icmp_mask = item->mask; ds_put_cstr(s, "rte flow icmp pattern:\n"); if (icmp_spec) { ds_put_format(s, " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", icmp_spec->hdr.icmp_type, icmp_spec->hdr.icmp_code); } else { ds_put_cstr(s, " Spec = null\n"); } if (icmp_mask) { ds_put_format(s, " Mask: icmp_type=0x%"PRIx8 ", icmp_code=0x%"PRIx8"\n", icmp_spec->hdr.icmp_type, icmp_spec->hdr.icmp_code); } else { ds_put_cstr(s, " Mask = null\n"); } } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { const struct rte_flow_item_tcp *tcp_spec = item->spec; const struct rte_flow_item_tcp *tcp_mask = item->mask; ds_put_cstr(s, "rte flow tcp pattern:\n"); if (tcp_spec) { ds_put_format(s, " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", ntohs(tcp_spec->hdr.src_port), ntohs(tcp_spec->hdr.dst_port), tcp_spec->hdr.data_off, tcp_spec->hdr.tcp_flags); } else { ds_put_cstr(s, " Spec = null\n"); } if (tcp_mask) { ds_put_format(s, " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", ntohs(tcp_mask->hdr.src_port), ntohs(tcp_mask->hdr.dst_port), tcp_mask->hdr.data_off, tcp_mask->hdr.tcp_flags); } else { ds_put_cstr(s, " Mask = null\n"); } } else { ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type); } } static void dump_flow_action(struct ds *s, const struct rte_flow_action *actions) { if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { const struct rte_flow_action_mark *mark = actions->conf; ds_put_cstr(s, "rte flow mark action:\n"); if (mark) { ds_put_format(s, " Mark: id=%d\n", mark->id); } else { ds_put_cstr(s, " Mark = null\n"); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { const struct rte_flow_action_rss *rss = actions->conf; ds_put_cstr(s, "rte flow RSS action:\n"); if (rss) { ds_put_format(s, " RSS: queue_num=%d\n", rss->queue_num); } else { ds_put_cstr(s, " RSS = null\n"); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { const struct rte_flow_action_count *count = actions->conf; ds_put_cstr(s, "rte flow count action:\n"); if (count) { ds_put_format(s, " Count: shared=%d, id=%d\n", count->shared, count->id); } else { ds_put_cstr(s, " Count = null\n"); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { const struct rte_flow_action_port_id *port_id = actions->conf; ds_put_cstr(s, "rte flow port-id action:\n"); if (port_id) { ds_put_format(s, " Port-id: original=%d, id=%d\n", port_id->original, port_id->id); } else { ds_put_cstr(s, " Port-id = null\n"); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { ds_put_cstr(s, "rte flow drop action\n"); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) { const struct rte_flow_action_set_mac *set_mac = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST ? "dst" : "src"; ds_put_format(s, "rte flow set-mac-%s action:\n", dirstr); if (set_mac) { ds_put_format(s, " Set-mac-%s: "ETH_ADDR_FMT"\n", dirstr, ETH_ADDR_BYTES_ARGS(set_mac->mac_addr)); } else { ds_put_format(s, " Set-mac-%s = null\n", dirstr); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) { const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST ? "dst" : "src"; ds_put_format(s, "rte flow set-ipv4-%s action:\n", dirstr); if (set_ipv4) { ds_put_format(s, " Set-ipv4-%s: "IP_FMT"\n", dirstr, IP_ARGS(set_ipv4->ipv4_addr)); } else { ds_put_format(s, " Set-ipv4-%s = null\n", dirstr); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) { const struct rte_flow_action_set_ttl *set_ttl = actions->conf; ds_put_cstr(s, "rte flow set-ttl action:\n"); if (set_ttl) { ds_put_format(s, " Set-ttl: %d\n", set_ttl->ttl_value); } else { ds_put_cstr(s, " Set-ttl = null\n"); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) { const struct rte_flow_action_set_tp *set_tp = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? "dst" : "src"; ds_put_format(s, "rte flow set-tcp/udp-port-%s action:\n", dirstr); if (set_tp) { ds_put_format(s, " Set-%s-tcp/udp-port: %"PRIu16"\n", dirstr, ntohs(set_tp->port)); } else { ds_put_format(s, " Set-%s-tcp/udp-port = null\n", dirstr); } } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } } static struct ds * dump_flow(struct ds *s, const struct rte_flow_attr *attr, const struct rte_flow_item *items, const struct rte_flow_action *actions) { if (attr) { dump_flow_attr(s, attr); } while (items && items->type != RTE_FLOW_ITEM_TYPE_END) { dump_flow_pattern(s, items++); } while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) { dump_flow_action(s, actions++); } return s; } static struct rte_flow * netdev_offload_dpdk_flow_create(struct netdev *netdev, const struct rte_flow_attr *attr, const struct rte_flow_item *items, const struct rte_flow_action *actions, struct rte_flow_error *error) { struct rte_flow *flow; struct ds s; flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error); if (flow) { if (!VLOG_DROP_DBG(&rl)) { ds_init(&s); dump_flow(&s, attr, items, actions); VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" created:\n%s", netdev_get_name(netdev), (intptr_t) flow, ds_cstr(&s)); ds_destroy(&s); } } else { enum vlog_level level = VLL_WARN; if (error->type == RTE_FLOW_ERROR_TYPE_ACTION) { level = VLL_DBG; } VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).", netdev_get_name(netdev), error->type, error->message); if (!vlog_should_drop(&this_module, level, &rl)) { ds_init(&s); dump_flow(&s, attr, items, actions); VLOG_RL(&rl, level, "Failed flow:\n%s", ds_cstr(&s)); ds_destroy(&s); } } return flow; } static void add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type, const void *spec, const void *mask) { int cnt = patterns->cnt; if (cnt == 0) { patterns->current_max = 8; patterns->items = xcalloc(patterns->current_max, sizeof *patterns->items); } else if (cnt == patterns->current_max) { patterns->current_max *= 2; patterns->items = xrealloc(patterns->items, patterns->current_max * sizeof *patterns->items); } patterns->items[cnt].type = type; patterns->items[cnt].spec = spec; patterns->items[cnt].mask = mask; patterns->items[cnt].last = NULL; patterns->cnt++; } static void add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type, const void *conf) { int cnt = actions->cnt; if (cnt == 0) { actions->current_max = 8; actions->actions = xcalloc(actions->current_max, sizeof *actions->actions); } else if (cnt == actions->current_max) { actions->current_max *= 2; actions->actions = xrealloc(actions->actions, actions->current_max * sizeof *actions->actions); } actions->actions[cnt].type = type; actions->actions[cnt].conf = conf; actions->cnt++; } static void free_flow_patterns(struct flow_patterns *patterns) { int i; for (i = 0; i < patterns->cnt; i++) { if (patterns->items[i].spec) { free(CONST_CAST(void *, patterns->items[i].spec)); } if (patterns->items[i].mask) { free(CONST_CAST(void *, patterns->items[i].mask)); } } free(patterns->items); patterns->items = NULL; patterns->cnt = 0; } static void free_flow_actions(struct flow_actions *actions) { int i; for (i = 0; i < actions->cnt; i++) { if (actions->actions[i].conf) { free(CONST_CAST(void *, actions->actions[i].conf)); } } free(actions->actions); actions->actions = NULL; actions->cnt = 0; } static int parse_flow_match(struct flow_patterns *patterns, const struct match *match) { uint8_t *next_proto_mask = NULL; uint8_t proto = 0; /* Eth */ if (!eth_addr_is_zero(match->wc.masks.dl_src) || !eth_addr_is_zero(match->wc.masks.dl_dst)) { struct rte_flow_item_eth *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); memcpy(&spec->dst, &match->flow.dl_dst, sizeof spec->dst); memcpy(&spec->src, &match->flow.dl_src, sizeof spec->src); spec->type = match->flow.dl_type; memcpy(&mask->dst, &match->wc.masks.dl_dst, sizeof mask->dst); memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src); mask->type = match->wc.masks.dl_type; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask); } else { /* * If user specifies a flow (like UDP flow) without L2 patterns, * OVS will at least set the dl_type. Normally, it's enough to * create an eth pattern just with it. Unluckily, some Intel's * NIC (such as XL710) doesn't support that. Below is a workaround, * which simply matches any L2 pkts. */ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); } /* VLAN */ if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { struct rte_flow_item_vlan *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); /* Match any protocols. */ mask->inner_type = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask); } /* IP v4 */ if (match->flow.dl_type == htons(ETH_TYPE_IP)) { struct rte_flow_item_ipv4 *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.type_of_service = match->flow.nw_tos; spec->hdr.time_to_live = match->flow.nw_ttl; spec->hdr.next_proto_id = match->flow.nw_proto; spec->hdr.src_addr = match->flow.nw_src; spec->hdr.dst_addr = match->flow.nw_dst; mask->hdr.type_of_service = match->wc.masks.nw_tos; mask->hdr.time_to_live = match->wc.masks.nw_ttl; mask->hdr.next_proto_id = match->wc.masks.nw_proto; mask->hdr.src_addr = match->wc.masks.nw_src; mask->hdr.dst_addr = match->wc.masks.nw_dst; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask); /* Save proto for L4 protocol setup. */ proto = spec->hdr.next_proto_id & mask->hdr.next_proto_id; next_proto_mask = &mask->hdr.next_proto_id; } if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && proto != IPPROTO_SCTP && proto != IPPROTO_TCP && (match->wc.masks.tp_src || match->wc.masks.tp_dst || match->wc.masks.tcp_flags)) { VLOG_DBG("L4 Protocol (%u) not supported", proto); return -1; } if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) || (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) { return -1; } if (proto == IPPROTO_TCP) { struct rte_flow_item_tcp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.src_port = match->flow.tp_src; spec->hdr.dst_port = match->flow.tp_dst; spec->hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; spec->hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask); /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } else if (proto == IPPROTO_UDP) { struct rte_flow_item_udp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.src_port = match->flow.tp_src; spec->hdr.dst_port = match->flow.tp_dst; mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask); /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } else if (proto == IPPROTO_SCTP) { struct rte_flow_item_sctp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.src_port = match->flow.tp_src; spec->hdr.dst_port = match->flow.tp_dst; mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask); /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } else if (proto == IPPROTO_ICMP) { struct rte_flow_item_icmp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.icmp_type = (uint8_t) ntohs(match->flow.tp_src); spec->hdr.icmp_code = (uint8_t) ntohs(match->flow.tp_dst); mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src); mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst); add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask); /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); return 0; } static void add_flow_mark_rss_actions(struct flow_actions *actions, uint32_t flow_mark, const struct netdev *netdev) { struct rte_flow_action_mark *mark; struct action_rss_data { struct rte_flow_action_rss conf; uint16_t queue[0]; } *rss_data; BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0); int i; mark = xzalloc(sizeof *mark); mark->id = flow_mark; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark); rss_data = xmalloc(sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0]); *rss_data = (struct action_rss_data) { .conf = (struct rte_flow_action_rss) { .func = RTE_ETH_HASH_FUNCTION_DEFAULT, .level = 0, .types = 0, .queue_num = netdev_n_rxq(netdev), .queue = rss_data->queue, .key_len = 0, .key = NULL }, }; /* Override queue array with default. */ for (i = 0; i < netdev_n_rxq(netdev); i++) { rss_data->queue[i] = i; } add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf); add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); } static struct rte_flow * netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns, struct netdev *netdev, uint32_t flow_mark) { struct flow_actions actions = { .actions = NULL, .cnt = 0 }; const struct rte_flow_attr flow_attr = { .group = 0, .priority = 0, .ingress = 1, .egress = 0 }; struct rte_flow_error error; struct rte_flow *flow; add_flow_mark_rss_actions(&actions, flow_mark, netdev); flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items, actions.actions, &error); free_flow_actions(&actions); return flow; } static void add_count_action(struct flow_actions *actions) { struct rte_flow_action_count *count = xzalloc(sizeof *count); add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); } static int add_port_id_action(struct flow_actions *actions, struct netdev *outdev) { struct rte_flow_action_port_id *port_id; int outdev_id; outdev_id = netdev_dpdk_get_port_id(outdev); if (outdev_id < 0) { return -1; } port_id = xzalloc(sizeof *port_id); port_id->id = outdev_id; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); return 0; } static int add_output_action(struct netdev *netdev, struct flow_actions *actions, const struct nlattr *nla, struct offload_info *info) { struct netdev *outdev; odp_port_t port; int ret = 0; port = nl_attr_get_odp_port(nla); outdev = netdev_ports_get(port, info->dpif_class); if (outdev == NULL) { VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port); return -1; } if (!netdev_flow_api_equals(netdev, outdev) || add_port_id_action(actions, outdev)) { VLOG_DBG_RL(&rl, "%s: Output to port \'%s\' cannot be offloaded.", netdev_get_name(netdev), netdev_get_name(outdev)); ret = -1; } netdev_close(outdev); return ret; } static int add_set_flow_action__(struct flow_actions *actions, const void *value, void *mask, const size_t size, const int attr) { void *spec; if (mask) { /* DPDK does not support partially masked set actions. In such * case, fail the offload. */ if (is_all_zeros(mask, size)) { return 0; } if (!is_all_ones(mask, size)) { VLOG_DBG_RL(&rl, "Partial mask is not supported"); return -1; } } spec = xzalloc(size); memcpy(spec, value, size); add_flow_action(actions, attr, spec); /* Clear used mask for later checking. */ if (mask) { memset(mask, 0, size); } return 0; } BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) == MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) == MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_tcp, tcp_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_udp, udp_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_udp, udp_dst)); static int parse_set_actions(struct flow_actions *actions, const struct nlattr *set_actions, const size_t set_actions_len, bool masked) { const struct nlattr *sa; unsigned int sleft; #define add_set_flow_action(field, type) \ if (add_set_flow_action__(actions, &key->field, \ mask ? CONST_CAST(void *, &mask->field) : NULL, \ sizeof key->field, type)) { \ return -1; \ } NL_ATTR_FOR_EACH_UNSAFE (sa, sleft, set_actions, set_actions_len) { if (nl_attr_type(sa) == OVS_KEY_ATTR_ETHERNET) { const struct ovs_key_ethernet *key = nl_attr_get(sa); const struct ovs_key_ethernet *mask = masked ? key + 1 : NULL; add_set_flow_action(eth_src, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC); add_set_flow_action(eth_dst, RTE_FLOW_ACTION_TYPE_SET_MAC_DST); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported ETHERNET set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV4) { const struct ovs_key_ipv4 *key = nl_attr_get(sa); const struct ovs_key_ipv4 *mask = masked ? key + 1 : NULL; add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC); add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST); add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported IPv4 set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) { const struct ovs_key_tcp *key = nl_attr_get(sa); const struct ovs_key_tcp *mask = masked ? key + 1 : NULL; add_set_flow_action(tcp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC); add_set_flow_action(tcp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported TCP set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_UDP) { const struct ovs_key_udp *key = nl_attr_get(sa); const struct ovs_key_udp *mask = masked ? key + 1 : NULL; add_set_flow_action(udp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC); add_set_flow_action(udp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported UDP set action"); return -1; } } else { VLOG_DBG_RL(&rl, "Unsupported set action type %d", nl_attr_type(sa)); return -1; } } return 0; } static int parse_flow_actions(struct netdev *netdev, struct flow_actions *actions, struct nlattr *nl_actions, size_t nl_actions_len, struct offload_info *info) { struct nlattr *nla; size_t left; add_count_action(actions); NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) { if (add_output_action(netdev, actions, nla, info)) { return -1; } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) { add_flow_action(actions, RTE_FLOW_ACTION_TYPE_DROP, NULL); } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET || nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) { const struct nlattr *set_actions = nl_attr_get(nla); const size_t set_actions_len = nl_attr_get_size(nla); bool masked = nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED; if (parse_set_actions(actions, set_actions, set_actions_len, masked)) { return -1; } } else { VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla)); return -1; } } if (nl_actions_len == 0) { VLOG_DBG_RL(&rl, "No actions provided"); return -1; } add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); return 0; } static struct rte_flow * netdev_offload_dpdk_actions(struct netdev *netdev, struct flow_patterns *patterns, struct nlattr *nl_actions, size_t actions_len, struct offload_info *info) { const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 }; struct flow_actions actions = { .actions = NULL, .cnt = 0 }; struct rte_flow *flow = NULL; struct rte_flow_error error; int ret; ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len, info); if (ret) { goto out; } flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items, actions.actions, &error); out: free_flow_actions(&actions); return flow; } static int netdev_offload_dpdk_add_flow(struct netdev *netdev, const struct match *match, struct nlattr *nl_actions, size_t actions_len, const ovs_u128 *ufid, struct offload_info *info) { struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; bool actions_offloaded = true; struct rte_flow *flow; int ret = 0; ret = parse_flow_match(&patterns, match); if (ret) { goto out; } flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions, actions_len, info); if (!flow) { /* If we failed to offload the rule actions fallback to MARK+RSS * actions. */ flow = netdev_offload_dpdk_mark_rss(&patterns, netdev, info->flow_mark); actions_offloaded = false; } if (!flow) { ret = -1; goto out; } ufid_to_rte_flow_associate(ufid, flow, actions_offloaded); VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); out: free_flow_patterns(&patterns); return ret; } /* * Check if any unsupported flow patterns are specified. */ static int netdev_offload_dpdk_validate_flow(const struct match *match) { struct match match_zero_wc; const struct flow *masks = &match->wc.masks; /* Create a wc-zeroed version of flow. */ match_init(&match_zero_wc, &match->flow, &match->wc); if (!is_all_zeros(&match_zero_wc.flow.tunnel, sizeof match_zero_wc.flow.tunnel)) { goto err; } if (masks->metadata || masks->skb_priority || masks->pkt_mark || masks->dp_hash) { goto err; } /* recirc id must be zero. */ if (match_zero_wc.flow.recirc_id) { goto err; } if (masks->ct_state || masks->ct_nw_proto || masks->ct_zone || masks->ct_mark || !ovs_u128_is_zero(masks->ct_label)) { goto err; } if (masks->conj_id || masks->actset_output) { goto err; } /* Unsupported L2. */ if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { goto err; } /* Unsupported L3. */ if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst || !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || !is_all_zeros(&masks->nsh, sizeof masks->nsh) || !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { goto err; } /* If fragmented, then don't HW accelerate - for now. */ if (match_zero_wc.flow.nw_frag) { goto err; } /* Unsupported L4. */ if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) { goto err; } return 0; err: VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols"); return -1; } static int netdev_offload_dpdk_destroy_flow(struct netdev *netdev, const ovs_u128 *ufid, struct rte_flow *rte_flow) { struct rte_flow_error error; int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); if (ret == 0) { ufid_to_rte_flow_disassociate(ufid); VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n", netdev_get_name(netdev), rte_flow, UUID_ARGS((struct uuid *)ufid)); } else { VLOG_ERR("%s: Failed to destroy flow: %s (%u)\n", netdev_get_name(netdev), error.message, error.type); } return ret; } static int netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, struct nlattr *actions, size_t actions_len, const ovs_u128 *ufid, struct offload_info *info, struct dpif_flow_stats *stats) { struct ufid_to_rte_flow_data *rte_flow_data; int ret; /* * If an old rte_flow exists, it means it's a flow modification. * Here destroy the old rte flow first before adding a new one. */ rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (rte_flow_data && rte_flow_data->rte_flow) { ret = netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow_data->rte_flow); if (ret < 0) { return ret; } } ret = netdev_offload_dpdk_validate_flow(match); if (ret < 0) { return ret; } if (stats) { memset(stats, 0, sizeof *stats); } return netdev_offload_dpdk_add_flow(netdev, match, actions, actions_len, ufid, info); } static int netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, struct dpif_flow_stats *stats) { struct ufid_to_rte_flow_data *rte_flow_data; rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (!rte_flow_data || !rte_flow_data->rte_flow) { return -1; } if (stats) { memset(stats, 0, sizeof *stats); } return netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow_data->rte_flow); } static int netdev_offload_dpdk_init_flow_api(struct netdev *netdev) { return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP; } static int netdev_offload_dpdk_flow_get(struct netdev *netdev, struct match *match OVS_UNUSED, struct nlattr **actions OVS_UNUSED, const ovs_u128 *ufid, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs, struct ofpbuf *buf OVS_UNUSED) { struct rte_flow_query_count query = { .reset = 1 }; struct ufid_to_rte_flow_data *rte_flow_data; struct rte_flow_error error; int ret = 0; rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (!rte_flow_data || !rte_flow_data->rte_flow) { ret = -1; goto out; } attrs->offloaded = true; if (!rte_flow_data->actions_offloaded) { attrs->dp_layer = "ovs"; memset(stats, 0, sizeof *stats); goto out; } attrs->dp_layer = "dpdk"; ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow, &query, &error); if (ret) { VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p\n", netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid), rte_flow_data->rte_flow); goto out; } rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0; rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0; if (query.hits_set && query.hits) { rte_flow_data->stats.used = time_msec(); } memcpy(stats, &rte_flow_data->stats, sizeof *stats); out: return ret; } const struct netdev_flow_api netdev_offload_dpdk = { .type = "dpdk_flow_api", .flow_put = netdev_offload_dpdk_flow_put, .flow_del = netdev_offload_dpdk_flow_del, .init_flow_api = netdev_offload_dpdk_init_flow_api, .flow_get = netdev_offload_dpdk_flow_get, };