2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 18:07:40 +00:00
ovs/lib/dpif-netdev-private-extract.c

376 lines
13 KiB
C
Raw Normal View History

/*
* Copyright (c) 2021 Intel.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include "dp-packet.h"
#include "dpif-netdev-private-dpcls.h"
#include "dpif-netdev-private-extract.h"
#include "dpif-netdev-private-thread.h"
#include "flow.h"
#include "openvswitch/vlog.h"
#include "ovs-thread.h"
#include "util.h"
VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
/* Variable to hold the default MFEX implementation. */
static ATOMIC(miniflow_extract_func) default_mfex_func;
/* Implementations of available extract options and
* the implementations are always in order of preference.
*/
static struct dpif_miniflow_extract_impl mfex_impls[] = {
[MFEX_IMPL_AUTOVALIDATOR] = {
.probe = NULL,
.extract_func = dpif_miniflow_extract_autovalidator,
.name = "autovalidator", },
[MFEX_IMPL_SCALAR] = {
.probe = NULL,
.extract_func = NULL,
.name = "scalar", },
[MFEX_IMPL_STUDY] = {
.probe = NULL,
.extract_func = mfex_study_traffic,
.name = "study", },
/* Compile in implementations only if the compiler ISA checks pass. */
#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
[MFEX_IMPL_VMBI_IPv4_UDP] = {
.probe = mfex_avx512_vbmi_probe,
.extract_func = mfex_avx512_vbmi_ip_udp,
.name = "avx512_vbmi_ipv4_udp", },
[MFEX_IMPL_IPv4_UDP] = {
.probe = mfex_avx512_probe,
.extract_func = mfex_avx512_ip_udp,
.name = "avx512_ipv4_udp", },
[MFEX_IMPL_VMBI_IPv4_TCP] = {
.probe = mfex_avx512_vbmi_probe,
.extract_func = mfex_avx512_vbmi_ip_tcp,
.name = "avx512_vbmi_ipv4_tcp", },
[MFEX_IMPL_IPv4_TCP] = {
.probe = mfex_avx512_probe,
.extract_func = mfex_avx512_ip_tcp,
.name = "avx512_ipv4_tcp", },
[MFEX_IMPL_VMBI_DOT1Q_IPv4_UDP] = {
.probe = mfex_avx512_vbmi_probe,
.extract_func = mfex_avx512_vbmi_dot1q_ip_udp,
.name = "avx512_vbmi_dot1q_ipv4_udp", },
[MFEX_IMPL_DOT1Q_IPv4_UDP] = {
.probe = mfex_avx512_probe,
.extract_func = mfex_avx512_dot1q_ip_udp,
.name = "avx512_dot1q_ipv4_udp", },
[MFEX_IMPL_VMBI_DOT1Q_IPv4_TCP] = {
.probe = mfex_avx512_vbmi_probe,
.extract_func = mfex_avx512_vbmi_dot1q_ip_tcp,
.name = "avx512_vbmi_dot1q_ipv4_tcp", },
[MFEX_IMPL_DOT1Q_IPv4_TCP] = {
.probe = mfex_avx512_probe,
.extract_func = mfex_avx512_dot1q_ip_tcp,
.name = "avx512_dot1q_ipv4_tcp", },
#endif
};
BUILD_ASSERT_DECL(MFEX_IMPL_MAX == ARRAY_SIZE(mfex_impls));
void
dpif_miniflow_extract_init(void)
{
atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
#ifdef MFEX_AUTOVALIDATOR_DEFAULT
int mfex_idx = MFEX_IMPL_AUTOVALIDATOR;
#else
int mfex_idx = MFEX_IMPL_SCALAR;
#endif
/* Call probe on each impl, and cache the result. */
for (int i = 0; i < MFEX_IMPL_MAX; i++) {
bool avail = true;
if (mfex_impls[i].probe) {
/* Return zero is success, non-zero means error. */
avail = (mfex_impls[i].probe() == 0);
}
VLOG_DBG("Miniflow Extract implementation '%s' %s available.",
mfex_impls[i].name, avail ? "is" : "is not");
mfex_impls[i].available = avail;
}
/* For the first call, this will be choosen based on the
* compile time flag.
*/
VLOG_INFO("Default MFEX Extract implementation is %s.\n",
mfex_impls[mfex_idx].name);
atomic_store_relaxed(mfex_func, (uintptr_t) mfex_impls
[mfex_idx].extract_func);
}
miniflow_extract_func
dp_mfex_impl_get_default(void)
{
miniflow_extract_func return_func;
atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
atomic_read_relaxed(mfex_func, (uintptr_t *) &return_func);
return return_func;
}
int
dp_mfex_impl_set_default_by_name(const char *name)
{
miniflow_extract_func new_default;
atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
int err = dp_mfex_impl_get_by_name(name, &new_default);
if (!err) {
atomic_store_relaxed(mfex_func, (uintptr_t) new_default);
}
return err;
}
void
dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
size_t pmd_list_size)
{
/* Add all MFEX functions to reply string. */
ds_put_cstr(reply, "Available MFEX implementations:\n");
for (int i = 0; i < MFEX_IMPL_MAX; i++) {
ds_put_format(reply, " %s (available: %s pmds: ",
mfex_impls[i].name, mfex_impls[i].available ?
"True" : "False");
for (size_t j = 0; j < pmd_list_size; j++) {
struct dp_netdev_pmd_thread *pmd = pmd_list[j];
if (pmd->core_id == NON_PMD_CORE_ID) {
continue;
}
if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
ds_put_format(reply, "%u,", pmd->core_id);
}
}
ds_chomp(reply, ',');
if (ds_last(reply) == ' ') {
ds_put_cstr(reply, "none");
}
ds_put_cstr(reply, ")\n");
}
}
/* This function checks all available MFEX implementations, and selects and
* returns the function pointer to the one requested by "name". If nothing
* is found it returns error.
*/
int
dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func)
{
if (!name || !out_func) {
return -EINVAL;
}
for (int i = 0; i < MFEX_IMPL_MAX; i++) {
if (strcmp(mfex_impls[i].name, name) == 0) {
/* Check available is set before exec. */
if (!mfex_impls[i].available) {
*out_func = NULL;
return -ENODEV;
}
*out_func = mfex_impls[i].extract_func;
return 0;
}
}
return -ENOENT;
}
struct dpif_miniflow_extract_impl *
dpif_mfex_impl_info_get(void) {
return mfex_impls;
}
uint32_t
dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
struct netdev_flow_key *keys,
uint32_t keys_size, odp_port_t in_port,
struct dp_netdev_pmd_thread *pmd_handle)
{
const size_t cnt = dp_packet_batch_size(packets);
uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
uint16_t good_l3_ofs[NETDEV_MAX_BURST];
uint16_t good_l4_ofs[NETDEV_MAX_BURST];
uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
struct dp_packet *packet;
struct dp_netdev_pmd_thread *pmd = pmd_handle;
struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
if (keys_size < cnt) {
miniflow_extract_func default_func = NULL;
atomic_uintptr_t *pmd_func = (void *)&pmd->miniflow_extract_opt;
atomic_store_relaxed(pmd_func, (uintptr_t) default_func);
VLOG_ERR("Invalid key size supplied, Key_size: %d less than"
"batch_size: %" PRIuSIZE"\n", keys_size, cnt);
VLOG_ERR("Autovalidatior is disabled.\n");
return 0;
}
/* Run scalar miniflow_extract to get default result. */
DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
pkt_metadata_init(&packet->md, in_port);
miniflow_extract(packet, &keys[i].mf);
/* Store known good metadata to compare with optimized metadata. */
good_l2_5_ofs[i] = packet->l2_5_ofs;
good_l3_ofs[i] = packet->l3_ofs;
good_l4_ofs[i] = packet->l4_ofs;
good_l2_pad_size[i] = packet->l2_pad_size;
}
uint32_t batch_failed = 0;
/* Iterate through each version of miniflow implementations. */
for (int j = MFEX_IMPL_START_IDX; j < MFEX_IMPL_MAX; j++) {
if (!mfex_impls[j].available) {
continue;
}
/* Reset keys and offsets before each implementation. */
memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
dp_packet_reset_offsets(packet);
}
/* Call optimized miniflow for each batch of packet. */
uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
keys_size, in_port,
pmd_handle);
/* Do a miniflow compare for bits, blocks and offsets for all the
* classified packets in the hitmask marked by set bits. */
while (hit_mask) {
/* Index for the set bit. */
uint32_t i = raw_ctz(hit_mask);
/* Set the index in hitmask to Zero. */
hit_mask &= (hit_mask - 1);
uint32_t failed = 0;
struct ds log_msg = DS_EMPTY_INITIALIZER;
ds_put_format(&log_msg, "MFEX autovalidator pkt %d\n", i);
/* Check miniflow bits are equal. */
if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
(keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
ds_put_format(&log_msg, "Autovalidation map failed\n"
"Good: 0x%llx 0x%llx Test: 0x%llx 0x%llx\n",
keys[i].mf.map.bits[0],
keys[i].mf.map.bits[1],
test_keys[i].mf.map.bits[0],
test_keys[i].mf.map.bits[1]);
failed = 1;
}
if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
uint32_t test_block_cnt = miniflow_n_values(&test_keys[i].mf);
ds_put_format(&log_msg, "Autovalidation blocks failed\n"
"Good hex:\n");
ds_put_hex_dump(&log_msg, &keys[i].buf, block_cnt * 8, 0,
false);
ds_put_format(&log_msg, "Test hex:\n");
ds_put_hex_dump(&log_msg, &test_keys[i].buf,
test_block_cnt * 8, 0, false);
failed = 1;
}
packet = packets->packets[i];
if ((packet->l2_pad_size != good_l2_pad_size[i]) ||
(packet->l2_5_ofs != good_l2_5_ofs[i]) ||
(packet->l3_ofs != good_l3_ofs[i]) ||
(packet->l4_ofs != good_l4_ofs[i])) {
ds_put_format(&log_msg,
"Autovalidation packet offsets failed\n");
ds_put_format(&log_msg, "Good offsets: "
"l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", "
"l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n",
good_l2_pad_size[i], good_l2_5_ofs[i],
good_l3_ofs[i], good_l4_ofs[i]);
ds_put_format(&log_msg, "Test offsets: "
"l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", "
"l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n",
packet->l2_pad_size, packet->l2_5_ofs,
packet->l3_ofs, packet->l4_ofs);
failed = 1;
}
if (failed) {
VLOG_ERR("Autovalidation for %s failed in pkt %d,"
" disabling.", mfex_impls[j].name, i);
VLOG_ERR("Autovalidation failure details:\n%s",
ds_cstr(&log_msg));
batch_failed = 1;
}
ds_destroy(&log_msg);
}
}
/* Having dumped the debug info for the batch, disable autovalidator. */
if (batch_failed) {
miniflow_extract_func default_func = NULL;
atomic_uintptr_t *pmd_func = (void *)&pmd->miniflow_extract_opt;
atomic_store_relaxed(pmd_func, (uintptr_t) default_func);
}
/* Preserve packet correctness by storing back the good offsets in
* packets back. */
DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
packet->l2_5_ofs = good_l2_5_ofs[i];
packet->l3_ofs = good_l3_ofs[i];
packet->l4_ofs = good_l4_ofs[i];
packet->l2_pad_size = good_l2_pad_size[i];
}
/* Returning zero implies no packets were hit by autovalidation. This
* simplifies unit-tests as changing --enable-mfex-default-autovalidator
* would pass/fail. By always returning zero, autovalidator is a little
* slower, but we gain consistency in testing. The auto-validator is only
* meant to test different implementaions against a batch of packets
* without incrementing hit counters.
*/
return 0;
}