ovs/lib/netdev-native-tnl.c

/*
 * Copyright (c) 2016 Nicira, Inc.
 * Copyright (c) 2016 Red Hat, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>

#include "netdev-native-tnl.h"

#include <errno.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <sys/ioctl.h>

#include <stdlib.h>
#include <sys/time.h>

#include "byte-order.h"
#include "coverage.h"
#include "csum.h"
#include "dp-packet.h"
#include "netdev.h"
#include "netdev-vport.h"
#include "netdev-vport-private.h"
#include "odp-netlink.h"
#include "packets.h"
#include "seq.h"
#include "unaligned.h"
#include "unixctl.h"
#include "util.h"
#include "openvswitch/vlog.h"

VLOG_DEFINE_THIS_MODULE(native_tnl);
static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);

COVERAGE_DEFINE(native_tnl_l3csum_checked);
COVERAGE_DEFINE(native_tnl_l3csum_err);
COVERAGE_DEFINE(native_tnl_l4csum_checked);
COVERAGE_DEFINE(native_tnl_l4csum_err);

#define VXLAN_HLEN   (sizeof(struct udp_header) +         \
                      sizeof(struct vxlanhdr))

#define GENEVE_BASE_HLEN   (sizeof(struct udp_header) +         \
                            sizeof(struct genevehdr))

#define GTPU_HLEN   (sizeof(struct udp_header) +        \
                     sizeof(struct gtpuhdr))

uint16_t tnl_udp_port_min = 32768;
uint16_t tnl_udp_port_max = 61000;

ovs_be16
netdev_tnl_get_src_port(struct dp_packet *packet)
{
    uint32_t hash;

    if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
        hash = dp_packet_get_rss_hash(packet);
    } else {
        struct flow flow;

        flow_extract(packet, &flow);
        hash = flow_hash_5tuple(&flow, 0);

        dp_packet_set_rss_hash(packet, hash);
    }

    hash = ((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32;

    return htons(hash + tnl_udp_port_min);
}

static void *
ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
                  unsigned int *hlen)
{
    void *nh;
    struct ip_header *ip;
    struct ovs_16aligned_ip6_hdr *ip6;
    void *l4;
    int l3_size;

    nh = dp_packet_l3(packet);
    ip = nh;
    ip6 = nh;
    l4 = dp_packet_l4(packet);

    if (!nh || !l4) {
        return NULL;
    }

    *hlen = sizeof(struct eth_header);

    l3_size = dp_packet_size(packet) -
              ((char *)nh - (char *)dp_packet_data(packet));

    if (IP_VER(ip->ip_ihl_ver) == 4) {
        bool bad_csum = dp_packet_ip_checksum_bad(packet);
        ovs_be32 ip_src, ip_dst;

        /* A packet coming from a network device might have the
         * csum already checked. In this case, skip the check. */
        if (OVS_UNLIKELY(!bad_csum && dp_packet_ip_checksum_unknown(packet))) {
            COVERAGE_INC(native_tnl_l3csum_checked);
            if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
                dp_packet_ip_checksum_set_bad(packet);
                bad_csum = true;
            } else {
                dp_packet_ip_checksum_set_good(packet);
            }
        }
        if (OVS_UNLIKELY(bad_csum)) {
            COVERAGE_INC(native_tnl_l3csum_err);
            VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
            return NULL;
        }

        if (ntohs(ip->ip_tot_len) > l3_size) {
            VLOG_WARN_RL(&err_rl, "ip packet is truncated (IP length %d, actual %d)",
                         ntohs(ip->ip_tot_len), l3_size);
            return NULL;
        }
        if (IP_IHL(ip->ip_ihl_ver) * 4 > sizeof(struct ip_header)) {
            VLOG_WARN_RL(&err_rl, "ip options not supported on tunnel packets "
                         "(%d bytes)", IP_IHL(ip->ip_ihl_ver) * 4);
            return NULL;
        }

        ip_src = get_16aligned_be32(&ip->ip_src);
        ip_dst = get_16aligned_be32(&ip->ip_dst);

        tnl->ip_src = ip_src;
        tnl->ip_dst = ip_dst;
        tnl->ip_tos = ip->ip_tos;
        tnl->ip_ttl = ip->ip_ttl;

        *hlen += IP_HEADER_LEN;

    } else if (IP_VER(ip->ip_ihl_ver) == 6) {
        ovs_be32 tc_flow = get_16aligned_be32(&ip6->ip6_flow);

        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);

        tnl->ip_tos = ntohl(tc_flow) >> 20;
        tnl->ip_ttl = ip6->ip6_hlim;

        *hlen += packet->l4_ofs - packet->l3_ofs;

    } else {
        VLOG_WARN_RL(&err_rl, "ipv4 packet has invalid version (%d)",
                     IP_VER(ip->ip_ihl_ver));
        return NULL;
    }

    return l4;
}

/* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
 * reallocating the packet if necessary.  'header' should contain an Ethernet
 * header, followed by an IPv4 header (without options), and an L4 header.
 *
 * This function sets the IP header's ip_tot_len field (which should be zeroed
 * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
 * updates IP header checksum if not offloaded, as well as the l3 and l4
 * offsets in the 'packet'.
 *
 * Return pointer to the L4 header added to 'packet'. */
void *
netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
                          int size, int *ip_tot_size, ovs_be32 ipv6_label)
{
    struct eth_header *eth;
    struct ip_header *ip;
    struct ovs_16aligned_ip6_hdr *ip6;

    eth = dp_packet_push_uninit(packet, size);
    *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);

    memcpy(eth, header, size);
    /* The encapsulated packet has type Ethernet. Adjust dp_packet. */
    packet->packet_type = htonl(PT_ETH);
    dp_packet_reset_offsets(packet);
    packet->l3_ofs = sizeof (struct eth_header);

    if (netdev_tnl_is_header_ipv6(header)) {
        ip6 = netdev_tnl_ipv6_hdr(eth);
        *ip_tot_size -= IPV6_HEADER_LEN;
        ip6->ip6_plen = htons(*ip_tot_size);
        packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label);
        dp_packet_ip_checksum_set_unknown(packet);

        packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;

        return ip6 + 1;
    } else {
        ip = netdev_tnl_ip_hdr(eth);
        ip->ip_tot_len = htons(*ip_tot_size);
        *ip_tot_size -= IP_HEADER_LEN;
        /* Postpone checksum to when the packet is pushed to the port. */
        dp_packet_ip_checksum_set_partial(packet);

        packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;

        return ip + 1;
    }
}

static void *
udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
                   unsigned int *hlen)
{
    struct udp_header *udp;

    udp = ip_extract_tnl_md(packet, tnl, hlen);
    if (!udp) {
        return NULL;
    }

    if (udp->udp_csum) {
        bool bad_csum = dp_packet_l4_checksum_bad(packet);

        if (OVS_UNLIKELY(!bad_csum && dp_packet_l4_checksum_unknown(packet))) {
            uint32_t csum;
            COVERAGE_INC(native_tnl_l4csum_checked);
            if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
                csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
            } else {
                csum = packet_csum_pseudoheader(dp_packet_l3(packet));
            }

            csum = csum_continue(csum, udp, dp_packet_size(packet) -
                                 ((const unsigned char *)udp -
                                  (const unsigned char *)dp_packet_eth(packet)
                                 ));
            if (csum_finish(csum)) {
                dp_packet_l4_checksum_set_bad(packet);
                bad_csum = true;
            } else {
                dp_packet_l4_checksum_set_good(packet);
            }
        }
        if (OVS_UNLIKELY(bad_csum)) {
            COVERAGE_INC(native_tnl_l4csum_err);
            return NULL;
        }
        tnl->flags |= FLOW_TNL_F_CSUM;
    }

    tnl->tp_src = udp->udp_src;
    tnl->tp_dst = udp->udp_dst;

    return udp + 1;
}

static void
tnl_ol_push(struct dp_packet *packet,
            const struct ovs_action_push_tnl *data)
{
    packet->offloads <<= DP_PACKET_OL_SHIFT_COUNT;

    if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
        dp_packet_tunnel_set_geneve(packet);
    } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
        dp_packet_tunnel_set_vxlan(packet);
    } else if (data->tnl_type == OVS_VPORT_TYPE_GRE ||
               data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
        dp_packet_tunnel_set_gre(packet);
    }
}

static void
tnl_ol_pop(struct dp_packet *packet, int off)
{
    packet->offloads >>= DP_PACKET_OL_SHIFT_COUNT;

    dp_packet_reset_packet(packet, off);
}

void
netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
                           struct dp_packet *packet,
                           const struct ovs_action_push_tnl *data)
{
    uint16_t l3_ofs = packet->l3_ofs;
    uint16_t l4_ofs = packet->l4_ofs;
    struct udp_header *udp;
    ovs_be16 udp_src;
    int ip_tot_size;

    /* We may need to re-calculate the hash and this has to be done before
     * modifying the packet. */
    udp_src = netdev_tnl_get_src_port(packet);

    tnl_ol_push(packet, data);
    udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
                                    &ip_tot_size, 0);

    udp->udp_src = udp_src;
    udp->udp_len = htons(ip_tot_size);

    dp_packet_l4_proto_set_udp(packet);
    if (udp->udp_csum) {
        dp_packet_l4_checksum_set_partial(packet);
    } else {
        dp_packet_l4_checksum_set_good(packet);
    }

    if (l3_ofs != UINT16_MAX) {
        packet->inner_l3_ofs = l3_ofs + data->header_len;
    }
    if (l4_ofs != UINT16_MAX) {
        packet->inner_l4_ofs = l4_ofs + data->header_len;
    }
}

static void *
eth_build_header(struct ovs_action_push_tnl *data,
                 const struct netdev_tnl_build_header_params *params)
{
    uint16_t eth_proto = params->is_ipv6 ? ETH_TYPE_IPV6 : ETH_TYPE_IP;
    struct eth_header *eth;

    memset(data->header, 0, sizeof data->header);

    eth = (struct eth_header *)data->header;
    eth->eth_dst = params->dmac;
    eth->eth_src = params->smac;
    eth->eth_type = htons(eth_proto);
    data->header_len = sizeof(struct eth_header);
    return eth + 1;
}

void *
netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
                           const struct netdev_tnl_build_header_params *params,
                           uint8_t next_proto, ovs_be32 ipv6_label)
{
    void *l3;

    l3 = eth_build_header(data, params);
    if (!params->is_ipv6) {
        ovs_be32 ip_src = in6_addr_get_mapped_ipv4(params->s_ip);
        struct ip_header *ip;

        ip = (struct ip_header *) l3;

        ip->ip_ihl_ver = IP_IHL_VER(5, 4);
        ip->ip_tos = params->flow->tunnel.ip_tos;
        ip->ip_ttl = params->flow->tunnel.ip_ttl;
        ip->ip_proto = next_proto;
        put_16aligned_be32(&ip->ip_src, ip_src);
        put_16aligned_be32(&ip->ip_dst, params->flow->tunnel.ip_dst);

        ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
                          htons(IP_DF) : 0;

        /* The checksum will be calculated when the headers are pushed
         * to the packet if offloading is not enabled. */

        data->header_len += IP_HEADER_LEN;
        return ip + 1;
    } else {
        struct ovs_16aligned_ip6_hdr *ip6;

        ip6 = (struct ovs_16aligned_ip6_hdr *) l3;

        put_16aligned_be32(&ip6->ip6_flow, htonl(6 << 28) |
                           htonl(params->flow->tunnel.ip_tos << 20) |
                           (ipv6_label & htonl(IPV6_LABEL_MASK)));
        ip6->ip6_hlim = params->flow->tunnel.ip_ttl;
        ip6->ip6_nxt = next_proto;
        memcpy(&ip6->ip6_src, params->s_ip, sizeof(ovs_be32[4]));
        memcpy(&ip6->ip6_dst, &params->flow->tunnel.ipv6_dst, sizeof(ovs_be32[4]));

        data->header_len += IPV6_HEADER_LEN;
        return ip6 + 1;
    }
}

static void *
udp_build_header(const struct netdev_tunnel_config *tnl_cfg,
                 struct ovs_action_push_tnl *data,
                 const struct netdev_tnl_build_header_params *params)
{
    struct udp_header *udp;

    udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP, 0);
    udp->udp_dst = tnl_cfg->dst_port;

    if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
        /* Write a value in now to mark that we should compute the checksum
         * later. 0xffff is handy because it is transparent to the
         * calculation. */
        udp->udp_csum = htons(0xffff);
    }
    data->header_len += sizeof *udp;
    return udp + 1;
}

static int
gre_header_len(ovs_be16 flags)
{
    int hlen = 4;

    if (flags & htons(GRE_CSUM)) {
        hlen += 4;
    }
    if (flags & htons(GRE_KEY)) {
        hlen += 4;
    }
    if (flags & htons(GRE_SEQ)) {
        hlen += 4;
    }
    return hlen;
}

static int
parse_gre_header(struct dp_packet *packet,
                 struct flow_tnl *tnl)
{
    const struct gre_base_hdr *greh;
    ovs_16aligned_be32 *options;
    int hlen;
    unsigned int ulen;
    uint16_t greh_protocol;

    greh = ip_extract_tnl_md(packet, tnl, &ulen);
    if (!greh) {
        return -EINVAL;
    }

    if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
        return -EINVAL;
    }

    hlen = ulen + gre_header_len(greh->flags);
    if (hlen > dp_packet_size(packet)) {
        return -EINVAL;
    }

    options = (ovs_16aligned_be32 *)(greh + 1);
    if (greh->flags & htons(GRE_CSUM)) {
        ovs_be16 pkt_csum;

        pkt_csum = csum(greh, dp_packet_size(packet) -
                              ((const unsigned char *)greh -
                               (const unsigned char *)dp_packet_eth(packet)));
        if (pkt_csum) {
            return -EINVAL;
        }
        tnl->flags = FLOW_TNL_F_CSUM;
        options++;
    }

    if (greh->flags & htons(GRE_KEY)) {
        tnl->tun_id = be32_to_be64(get_16aligned_be32(options));
        tnl->flags |= FLOW_TNL_F_KEY;
        options++;
    }

    if (greh->flags & htons(GRE_SEQ)) {
        options++;
    }

    /* Set the new packet type depending on the GRE protocol field. */
    greh_protocol = ntohs(greh->protocol);
    if (greh_protocol == ETH_TYPE_TEB) {
        packet->packet_type = htonl(PT_ETH);
    } else if (greh_protocol >= ETH_TYPE_MIN) {
        /* Allow all GRE protocol values above 0x5ff as Ethertypes. */
        packet->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, greh_protocol);
    } else {
        return -EINVAL;
    }

    return hlen;
}

struct dp_packet *
netdev_gre_pop_header(struct dp_packet *packet)
{
    const void *data_dp = dp_packet_data(packet);
    struct pkt_metadata *md = &packet->md;
    struct flow_tnl *tnl = &md->tunnel;
    int hlen = sizeof(struct eth_header) + 4;

    ovs_assert(data_dp);

    hlen += netdev_tnl_is_header_ipv6(data_dp) ?
            IPV6_HEADER_LEN : IP_HEADER_LEN;

    pkt_metadata_init_tnl(md);
    if (hlen > dp_packet_size(packet)) {
        goto err;
    }

    hlen = parse_gre_header(packet, tnl);
    if (hlen < 0) {
        goto err;
    }

    tnl_ol_pop(packet, hlen);

    return packet;
err:
    dp_packet_delete(packet);
    return NULL;
}

void
netdev_gre_push_header(const struct netdev *netdev,
                       struct dp_packet *packet,
                       const struct ovs_action_push_tnl *data)
{
    struct netdev_vport *dev = netdev_vport_cast(netdev);
    uint16_t l3_ofs = packet->l3_ofs;
    uint16_t l4_ofs = packet->l4_ofs;
    struct gre_base_hdr *greh;
    int ip_tot_size;

    tnl_ol_push(packet, data);

    greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
                                     &ip_tot_size, 0);

    if (greh->flags & htons(GRE_CSUM)) {
        ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
        *csum_opt = csum(greh, ip_tot_size);
    }

    if (greh->flags & htons(GRE_SEQ)) {
        if (!dp_packet_get_tso_segsz(packet)) {
            /* Last 4 bytes are GRE seqno. */
            int seq_ofs = gre_header_len(greh->flags) - 4;
            ovs_16aligned_be32 *seq_opt =
                ALIGNED_CAST(ovs_16aligned_be32 *, (char *) greh + seq_ofs);

            put_16aligned_be32(seq_opt,
                               htonl(atomic_count_inc(&dev->gre_seqno)));
        } else {
            VLOG_WARN_RL(&err_rl, "Cannot use GRE Sequence numbers with TSO.");
        }
    }

    if (l3_ofs != UINT16_MAX) {
        packet->inner_l3_ofs = l3_ofs + data->header_len;
    }
    if (l4_ofs != UINT16_MAX) {
        packet->inner_l4_ofs = l4_ofs + data->header_len;
    }
}

int
netdev_gre_build_header(const struct netdev *netdev,
                        struct ovs_action_push_tnl *data,
                        const struct netdev_tnl_build_header_params *params)
{
    const struct netdev_tunnel_config *tnl_cfg;
    struct gre_base_hdr *greh;
    ovs_16aligned_be32 *options;
    unsigned int hlen;

    greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE, 0);

    if (params->flow->packet_type == htonl(PT_ETH)) {
        greh->protocol = htons(ETH_TYPE_TEB);
    } else if (pt_ns(params->flow->packet_type) == OFPHTN_ETHERTYPE) {
        greh->protocol = pt_ns_type_be(params->flow->packet_type);
    } else {
        return EINVAL;
    }
    greh->flags = 0;

    options = (ovs_16aligned_be32 *) (greh + 1);
    if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
        greh->flags |= htons(GRE_CSUM);
        put_16aligned_be32(options, 0);
        options++;
    }

    tnl_cfg = netdev_get_tunnel_config(netdev);

    if (tnl_cfg->out_key_present) {
        greh->flags |= htons(GRE_KEY);
        put_16aligned_be32(options, be64_to_be32(params->flow->tunnel.tun_id));
        options++;
    }

    if (tnl_cfg->set_seq) {
        greh->flags |= htons(GRE_SEQ);
        /* seqno is updated at push header */
        options++;
    }

    hlen = (uint8_t *) options - (uint8_t *) greh;

    data->header_len += hlen;
    if (!params->is_ipv6) {
        data->tnl_type = OVS_VPORT_TYPE_GRE;
    } else {
        data->tnl_type = OVS_VPORT_TYPE_IP6GRE;
    }
    return 0;
}

struct dp_packet *
netdev_erspan_pop_header(struct dp_packet *packet)
{
    const struct gre_base_hdr *greh;
    const struct erspan_base_hdr *ersh;
    struct pkt_metadata *md = &packet->md;
    struct flow_tnl *tnl = &md->tunnel;
    int hlen = sizeof(struct eth_header);
    unsigned int ulen;
    uint16_t greh_protocol;

    hlen += netdev_tnl_is_header_ipv6(dp_packet_data(packet)) ?
            IPV6_HEADER_LEN : IP_HEADER_LEN;

    pkt_metadata_init_tnl(md);
    if (hlen > dp_packet_size(packet)) {
        goto err;
    }

    greh = ip_extract_tnl_md(packet, tnl, &ulen);
    if (!greh) {
        goto err;
    }

    greh_protocol = ntohs(greh->protocol);
    if (greh_protocol != ETH_TYPE_ERSPAN1 &&
        greh_protocol != ETH_TYPE_ERSPAN2) {
        goto err;
    }

    if (greh->flags & ~htons(GRE_SEQ)) {
        goto err;
    }

    ersh = ERSPAN_HDR(greh);
    tnl->tun_id = be16_to_be64(htons(get_sid(ersh)));
    tnl->erspan_ver = ersh->ver;

    if (ersh->ver == 1) {
        ovs_16aligned_be32 *index = ALIGNED_CAST(ovs_16aligned_be32 *,
                                                 ersh + 1);
        tnl->erspan_idx = ntohl(get_16aligned_be32(index));
        tnl->flags |= FLOW_TNL_F_KEY;
        hlen = ulen + ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V1_MDSIZE;
    } else if (ersh->ver == 2) {
        struct erspan_md2 *md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);
        tnl->erspan_dir = md2->dir;
        tnl->erspan_hwid = get_hwid(md2);
        tnl->flags |= FLOW_TNL_F_KEY;
        hlen = ulen + ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V2_MDSIZE;
    } else {
        VLOG_WARN_RL(&err_rl, "ERSPAN version error %d", ersh->ver);
        goto err;
    }

    if (hlen > dp_packet_size(packet)) {
        goto err;
    }

    tnl_ol_pop(packet, hlen);

    return packet;
err:
    dp_packet_delete(packet);
    return NULL;
}

void
netdev_erspan_push_header(const struct netdev *netdev,
                          struct dp_packet *packet,
                          const struct ovs_action_push_tnl *data)
{
    struct netdev_vport *dev = netdev_vport_cast(netdev);
    struct erspan_base_hdr *ersh;
    struct gre_base_hdr *greh;
    struct erspan_md2 *md2;
    int ip_tot_size;

    greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
                                     &ip_tot_size, 0);

    /* update GRE seqno */
    ovs_16aligned_be32 *seqno = (ovs_16aligned_be32 *) (greh + 1);
    put_16aligned_be32(seqno, htonl(atomic_count_inc(&dev->gre_seqno)));

    /* update v2 timestamp */
    if (greh->protocol == htons(ETH_TYPE_ERSPAN2)) {
        ersh = ERSPAN_HDR(greh);
        md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);
        put_16aligned_be32(&md2->timestamp, get_erspan_ts(ERSPAN_100US));
    }
}

int
netdev_erspan_build_header(const struct netdev *netdev,
                           struct ovs_action_push_tnl *data,
                           const struct netdev_tnl_build_header_params *params)
{
    const struct netdev_tunnel_config *tnl_cfg;
    struct gre_base_hdr *greh;
    struct erspan_base_hdr *ersh;
    unsigned int hlen;
    uint32_t tun_id;
    int erspan_ver;
    uint16_t sid;

    greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE, 0);
    ersh = ERSPAN_HDR(greh);

    tun_id = ntohl(be64_to_be32(params->flow->tunnel.tun_id));
    /* ERSPAN only has 10-bit session ID */
    if (tun_id & ~ERSPAN_SID_MASK) {
        return EINVAL;
    } else {
        sid = (uint16_t) tun_id;
    }

    tnl_cfg = netdev_get_tunnel_config(netdev);

    if (tnl_cfg->erspan_ver_flow) {
        erspan_ver = params->flow->tunnel.erspan_ver;
    } else {
        erspan_ver = tnl_cfg->erspan_ver;
    }

    if (erspan_ver == 1) {
        greh->protocol = htons(ETH_TYPE_ERSPAN1);
        greh->flags = htons(GRE_SEQ);
        ersh->ver = 1;
        set_sid(ersh, sid);

        uint32_t erspan_idx = (tnl_cfg->erspan_idx_flow
                          ? params->flow->tunnel.erspan_idx
                          : tnl_cfg->erspan_idx);
        put_16aligned_be32(ALIGNED_CAST(ovs_16aligned_be32 *, ersh + 1),
                           htonl(erspan_idx));

        hlen = ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V1_MDSIZE;
    } else if (erspan_ver == 2) {
        struct erspan_md2 *md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);

        greh->protocol = htons(ETH_TYPE_ERSPAN2);
        greh->flags = htons(GRE_SEQ);
        ersh->ver = 2;
        set_sid(ersh, sid);

        md2->sgt = 0; /* security group tag */
        md2->gra = 0;
        put_16aligned_be32(&md2->timestamp, 0);

        if (tnl_cfg->erspan_hwid_flow) {
            set_hwid(md2, params->flow->tunnel.erspan_hwid);
        } else {
            set_hwid(md2, tnl_cfg->erspan_hwid);
        }

        if (tnl_cfg->erspan_dir_flow) {
            md2->dir = params->flow->tunnel.erspan_dir;
        } else {
            md2->dir = tnl_cfg->erspan_dir;
        }

        hlen = ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V2_MDSIZE;
    } else {
        VLOG_WARN_RL(&err_rl, "ERSPAN version error %d", tnl_cfg->erspan_ver);
        return EINVAL;
    }

    data->header_len += hlen;

    if (params->is_ipv6) {
        data->tnl_type = OVS_VPORT_TYPE_IP6ERSPAN;
    } else {
        data->tnl_type = OVS_VPORT_TYPE_ERSPAN;
    }
    return 0;
}

struct dp_packet *
netdev_gtpu_pop_header(struct dp_packet *packet)
{
    struct pkt_metadata *md = &packet->md;
    struct flow_tnl *tnl = &md->tunnel;
    struct gtpuhdr *gtph;
    unsigned int gtpu_hlen;
    unsigned int hlen;

    ovs_assert(packet->l3_ofs > 0);
    ovs_assert(packet->l4_ofs > 0);

    pkt_metadata_init_tnl(md);
    if (GTPU_HLEN > dp_packet_l4_size(packet)) {
        goto err;
    }

    gtph = udp_extract_tnl_md(packet, tnl, &hlen);
    if (!gtph) {
        goto err;
    }

    tnl->gtpu_flags = gtph->md.flags;
    tnl->gtpu_msgtype = gtph->md.msgtype;
    tnl->tun_id = be32_to_be64(get_16aligned_be32(&gtph->teid));

    if (tnl->gtpu_msgtype == GTPU_MSGTYPE_GPDU) {
        struct ip_header *ip;

        if (gtph->md.flags & GTPU_S_MASK) {
            gtpu_hlen = GTPU_HLEN + sizeof(struct gtpuhdr_opt);
        } else {
            gtpu_hlen = GTPU_HLEN;
        }
        ip = ALIGNED_CAST(struct ip_header *, (char *)gtph + gtpu_hlen);

        if (IP_VER(ip->ip_ihl_ver) == 4) {
            packet->packet_type = htonl(PT_IPV4);
        } else if (IP_VER(ip->ip_ihl_ver) == 6) {
            packet->packet_type = htonl(PT_IPV6);
        } else {
            VLOG_WARN_RL(&err_rl, "GTP-U: Receive non-IP packet.");
        }
        tnl_ol_pop(packet, hlen + gtpu_hlen);
    } else {
        /* non-GPDU GTP-U messages, ex: echo request, end marker.
         * Users should redirect these packets to controller, or.
         * any application that handles GTP-U messages, so keep
         * the original packet.
         */
        packet->packet_type = htonl(PT_ETH);
        VLOG_WARN_ONCE("Receive non-GPDU msgtype: %"PRIu8,
                       gtph->md.msgtype);
    }

    return packet;

err:
    dp_packet_delete(packet);
    return NULL;
}

void
netdev_gtpu_push_header(const struct netdev *netdev,
                        struct dp_packet *packet,
                        const struct ovs_action_push_tnl *data)
{
    struct netdev_vport *dev = netdev_vport_cast(netdev);
    struct udp_header *udp;
    struct gtpuhdr *gtpuh;
    ovs_be16 udp_src;
    int ip_tot_size;
    unsigned int payload_len;

    /* We may need to re-calculate the hash and this has to be done before
     * modifying the packet. */
    udp_src = netdev_tnl_get_src_port(packet);

    payload_len = dp_packet_size(packet);
    udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
                                    &ip_tot_size, 0);
    udp->udp_src = udp_src;
    udp->udp_len = htons(ip_tot_size);
    /* Postpone checksum to the egress netdev. */
    dp_packet_l4_proto_set_udp(packet);
    dp_packet_l4_checksum_set_partial(packet);

    gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);

    if (gtpuh->md.flags & GTPU_S_MASK) {
        ovs_be16 *seqno = ALIGNED_CAST(ovs_be16 *, gtpuh + 1);
        *seqno = htons(atomic_count_inc(&dev->gre_seqno));
        payload_len += sizeof(struct gtpuhdr_opt);
    }
    gtpuh->len = htons(payload_len);
}

int
netdev_gtpu_build_header(const struct netdev *netdev,
                         struct ovs_action_push_tnl *data,
                         const struct netdev_tnl_build_header_params *params)
{
    const struct netdev_tunnel_config *tnl_cfg;
    struct gtpuhdr *gtph;
    unsigned int gtpu_hlen;

    tnl_cfg = netdev_get_tunnel_config(netdev);

    gtph = udp_build_header(tnl_cfg, data, params);

    /* Set to default if not set in flow. */
    gtph->md.flags = params->flow->tunnel.gtpu_flags ?
                     params->flow->tunnel.gtpu_flags : GTPU_FLAGS_DEFAULT;
    gtph->md.msgtype = params->flow->tunnel.gtpu_msgtype ?
                       params->flow->tunnel.gtpu_msgtype : GTPU_MSGTYPE_GPDU;
    put_16aligned_be32(&gtph->teid,
                       be64_to_be32(params->flow->tunnel.tun_id));

    gtpu_hlen = sizeof *gtph;
    if (tnl_cfg->set_seq) {
        gtph->md.flags |= GTPU_S_MASK;
        gtpu_hlen += sizeof(struct gtpuhdr_opt);
    }

    data->header_len += gtpu_hlen;
    data->tnl_type = OVS_VPORT_TYPE_GTPU;

    return 0;
}

int
netdev_srv6_build_header(const struct netdev *netdev,
                         struct ovs_action_push_tnl *data,
                         const struct netdev_tnl_build_header_params *params)
{
    const struct netdev_tunnel_config *tnl_cfg;
    union ovs_16aligned_in6_addr *s;
    const struct in6_addr *segs;
    struct srv6_base_hdr *srh;
    ovs_be16 dl_type;
    int nr_segs;
    int i;

    tnl_cfg = netdev_get_tunnel_config(netdev);
    if (tnl_cfg->srv6_num_segs) {
        nr_segs = tnl_cfg->srv6_num_segs;
        segs = tnl_cfg->srv6_segs;
    } else {
        /*
         * If explicit segment list setting is omitted, tunnel destination
         * is considered to be the first segment list.
         */
        nr_segs = 1;
        segs = &params->flow->tunnel.ipv6_dst;
    }

    if (!ipv6_addr_equals(&segs[0], &params->flow->tunnel.ipv6_dst)) {
        return EINVAL;
    }

    /* Writes the netdev_srv6_flowlabel enum value to the ipv6
     * flowlabel field. It must later be replaced by a valid value
     * in the header push. */
    srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING,
                                     htonl(tnl_cfg->srv6_flowlabel));

    srh->rt_hdr.segments_left = nr_segs - 1;
    srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
    srh->rt_hdr.hdrlen = 2 * nr_segs;
    srh->last_entry = nr_segs - 1;
    srh->flags = 0;
    srh->tag = 0;

    dl_type = params->flow->dl_type;
    if (dl_type == htons(ETH_TYPE_IP)) {
        srh->rt_hdr.nexthdr = IPPROTO_IPIP;
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
        srh->rt_hdr.nexthdr = IPPROTO_IPV6;
    } else {
        return EOPNOTSUPP;
    }

    s = (union ovs_16aligned_in6_addr *) (srh + 1);
    for (i = 0; i < nr_segs; i++) {
        /* Segment list is written to the header in reverse order. */
        memcpy(s, &segs[nr_segs - i - 1], sizeof *s);
        s++;
    }

    data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
    data->tnl_type = OVS_VPORT_TYPE_SRV6;

    return 0;
}

void
netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
                        struct dp_packet *packet,
                        const struct ovs_action_push_tnl *data)
{
    struct ovs_16aligned_ip6_hdr *inner_ip6, *outer_ip6;
    enum netdev_srv6_flowlabel srv6_flowlabel;
    ovs_be32 ipv6_label = 0;
    int ip_tot_size;
    uint32_t flow;

    inner_ip6 = dp_packet_l3(packet);
    outer_ip6 = netdev_tnl_ipv6_hdr((void *) data->header);
    srv6_flowlabel = ntohl(get_16aligned_be32(&outer_ip6->ip6_flow)) &
                     IPV6_LABEL_MASK;

    switch (srv6_flowlabel) {
    case SRV6_FLOWLABEL_COPY:
        flow = ntohl(get_16aligned_be32(&inner_ip6->ip6_flow));
        ipv6_label = (flow >> 28) == 6 ? htonl(flow & IPV6_LABEL_MASK) : 0;
        break;

    case SRV6_FLOWLABEL_ZERO:
        ipv6_label = 0;
        break;

    case SRV6_FLOWLABEL_COMPUTE:
        ipv6_label = htonl(dp_packet_get_rss_hash(packet) & IPV6_LABEL_MASK);
        break;
    }

    netdev_tnl_push_ip_header(packet, data->header,
                              data->header_len, &ip_tot_size, ipv6_label);
}

struct dp_packet *
netdev_srv6_pop_header(struct dp_packet *packet)
{
    const struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
    struct pkt_metadata *md = &packet->md;
    struct flow_tnl *tnl = &md->tunnel;
    const struct ip6_rt_hdr *rt_hdr;
    uint8_t nw_proto = nh->ip6_nxt;
    const void *data = nh + 1;
    uint8_t nw_frag = 0;
    unsigned int hlen;
    size_t size;

    /*
     * Verifies that the routing header is present in the IPv6
     * extension headers and that its type is SRv6.
     */
    size = dp_packet_l3_size(packet);
    if (size < IPV6_HEADER_LEN) {
        goto err;
    }
    size -= IPV6_HEADER_LEN;

    if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
                             NULL, &rt_hdr)) {
        goto err;
    }

    if (!rt_hdr || rt_hdr->type != IPV6_SRCRT_TYPE_4) {
        goto err;
    }

    if (rt_hdr->segments_left > 0) {
        VLOG_WARN_RL(&err_rl, "invalid srv6 segments_left=%d\n",
                     rt_hdr->segments_left);
        goto err;
    }

    if (rt_hdr->nexthdr == IPPROTO_IPIP) {
        packet->packet_type = htonl(PT_IPV4);
    } else if (rt_hdr->nexthdr == IPPROTO_IPV6) {
        packet->packet_type = htonl(PT_IPV6);
    } else {
        goto err;
    }

    pkt_metadata_init_tnl(md);
    if (!ip_extract_tnl_md(packet, tnl, &hlen)) {
        goto err;
    }

    tnl_ol_pop(packet, hlen);

    return packet;
err:
    dp_packet_delete(packet);
    return NULL;
}

struct dp_packet *
netdev_vxlan_pop_header(struct dp_packet *packet)
{
    struct pkt_metadata *md = &packet->md;
    struct flow_tnl *tnl = &md->tunnel;
    struct vxlanhdr *vxh;
    unsigned int hlen;
    ovs_be32 vx_flags;
    enum packet_type next_pt = PT_ETH;

    ovs_assert(packet->l3_ofs > 0);
    ovs_assert(packet->l4_ofs > 0);

    pkt_metadata_init_tnl(md);
    if (VXLAN_HLEN > dp_packet_l4_size(packet)) {
        goto err;
    }

    vxh = udp_extract_tnl_md(packet, tnl, &hlen);
    if (!vxh) {
        goto err;
    }

    vx_flags = get_16aligned_be32(&vxh->vx_flags);
    if (vx_flags & htonl(VXLAN_HF_GPE)) {
        vx_flags &= htonl(~VXLAN_GPE_USED_BITS);
        /* Drop the OAM packets */
        if (vxh->vx_gpe.flags & VXLAN_GPE_FLAGS_O) {
            goto err;
        }
        switch (vxh->vx_gpe.next_protocol) {
        case VXLAN_GPE_NP_IPV4:
            next_pt = PT_IPV4;
            break;
        case VXLAN_GPE_NP_IPV6:
            next_pt = PT_IPV6;
            break;
        case VXLAN_GPE_NP_NSH:
            next_pt = PT_NSH;
            break;
        case VXLAN_GPE_NP_ETHERNET:
            next_pt = PT_ETH;
            break;
        default:
            goto err;
        }
    }

    if (vx_flags != htonl(VXLAN_FLAGS) ||
       (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
        VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
                     ntohl(vx_flags),
                     ntohl(get_16aligned_be32(&vxh->vx_vni)));
        goto err;
    }
    tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
    tnl->flags |= FLOW_TNL_F_KEY;

    packet->packet_type = htonl(next_pt);
    tnl_ol_pop(packet, hlen + VXLAN_HLEN);
    if (next_pt != PT_ETH) {
        packet->l3_ofs = 0;
    }

    return packet;
err:
    dp_packet_delete(packet);
    return NULL;
}

int
netdev_vxlan_build_header(const struct netdev *netdev,
                          struct ovs_action_push_tnl *data,
                          const struct netdev_tnl_build_header_params *params)
{
    const struct netdev_tunnel_config *tnl_cfg;
    struct vxlanhdr *vxh;

    tnl_cfg = netdev_get_tunnel_config(netdev);

    vxh = udp_build_header(tnl_cfg, data, params);

    if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
        put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS | VXLAN_HF_GPE));
        put_16aligned_be32(&vxh->vx_vni,
                           htonl(ntohll(params->flow->tunnel.tun_id) << 8));
        if (params->flow->packet_type == htonl(PT_ETH)) {
            vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_ETHERNET;
        } else if (pt_ns(params->flow->packet_type) == OFPHTN_ETHERTYPE) {
            switch (pt_ns_type(params->flow->packet_type)) {
            case ETH_TYPE_IP:
                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_IPV4;
                break;
            case ETH_TYPE_IPV6:
                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_IPV6;
                break;
            case ETH_TYPE_NSH:
                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_NSH;
                break;
            case ETH_TYPE_TEB:
                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_ETHERNET;
                break;
            default:
                return EINVAL;
            }
        } else {
            return EINVAL;
        }
    } else {
        put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
        put_16aligned_be32(&vxh->vx_vni,
                           htonl(ntohll(params->flow->tunnel.tun_id) << 8));
    }

    data->header_len += sizeof *vxh;
    data->tnl_type = OVS_VPORT_TYPE_VXLAN;
    return 0;
}

struct dp_packet *
netdev_geneve_pop_header(struct dp_packet *packet)
{
    struct pkt_metadata *md = &packet->md;
    struct flow_tnl *tnl = &md->tunnel;
    struct genevehdr *gnh;
    unsigned int hlen, opts_len, ulen;

    pkt_metadata_init_tnl(md);
    if (GENEVE_BASE_HLEN > dp_packet_l4_size(packet)) {
        VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%"PRIuSIZE"\n",
                     (unsigned int)GENEVE_BASE_HLEN, dp_packet_l4_size(packet));
        goto err;
    }

    gnh = udp_extract_tnl_md(packet, tnl, &ulen);
    if (!gnh) {
        goto err;
    }

    opts_len = gnh->opt_len * 4;
    hlen = ulen + GENEVE_BASE_HLEN + opts_len;
    if (hlen > dp_packet_size(packet)) {
        VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
                     hlen, dp_packet_size(packet));
        goto err;
    }

    if (gnh->ver != 0) {
        VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
        goto err;
    }

    if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
        VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
                     ntohs(gnh->proto_type));
        goto err;
    }

    tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
    tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
    tnl->flags |= FLOW_TNL_F_KEY;

    memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
    tnl->metadata.present.len = opts_len;
    tnl->flags |= FLOW_TNL_F_UDPIF;

    packet->packet_type = htonl(PT_ETH);
    tnl_ol_pop(packet, hlen);

    return packet;
err:
    dp_packet_delete(packet);
    return NULL;
}

int
netdev_geneve_build_header(const struct netdev *netdev,
                           struct ovs_action_push_tnl *data,
                           const struct netdev_tnl_build_header_params *params)
{
    struct genevehdr *gnh;
    int opt_len;
    bool crit_opt;

    gnh = udp_build_header(netdev_get_tunnel_config(netdev), data, params);

    put_16aligned_be32(&gnh->vni, htonl(ntohll(params->flow->tunnel.tun_id) << 8));

    opt_len = tun_metadata_to_geneve_header(&params->flow->tunnel,
                                            gnh->options, &crit_opt);

    gnh->opt_len = opt_len / 4;
    gnh->oam = !!(params->flow->tunnel.flags & FLOW_TNL_F_OAM);
    gnh->critical = crit_opt ? 1 : 0;
    gnh->proto_type = htons(ETH_TYPE_TEB);

    data->header_len += sizeof *gnh + opt_len;
    data->tnl_type = OVS_VPORT_TYPE_GENEVE;
    return 0;
}


void
netdev_tnl_egress_port_range(struct unixctl_conn *conn, int argc,
                             const char *argv[], void *aux OVS_UNUSED)
{
    int val1, val2;

    if (argc < 3) {
        struct ds ds = DS_EMPTY_INITIALIZER;

        ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
                            tnl_udp_port_min, tnl_udp_port_max);

        unixctl_command_reply(conn, ds_cstr(&ds));
        ds_destroy(&ds);
        return;
    }

    if (argc != 3) {
        return;
    }

    val1 = atoi(argv[1]);
    if (val1 <= 0 || val1 > UINT16_MAX) {
        unixctl_command_reply(conn, "Invalid min.");
        return;
    }
    val2 = atoi(argv[2]);
    if (val2 <= 0 || val2 > UINT16_MAX) {
        unixctl_command_reply(conn, "Invalid max.");
        return;
    }

    if (val1 > val2) {
        tnl_udp_port_min = val2;
        tnl_udp_port_max = val1;
    } else {
        tnl_udp_port_min = val1;
        tnl_udp_port_max = val2;
    }
    seq_change(tnl_conf_seq);

    unixctl_command_reply(conn, "OK");
}
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								/*
 								 * Copyright (c) 2016 Nicira, Inc.
-												netdev-vport: Update copyright headers

Red Hat has contributed to the original code that has moved to netdev-native-tnl
module and to code that has been kept in netdev-vport as well.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-06-02 07:18:49 -03:00
+								 * Copyright (c) 2016 Red Hat, Inc.
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								#include <config.h>
-												netdev-vport: remove unneeded headers

Throughout the years, changes in netdev vport have removed the need for some of
the headers, like shash, hmap, and many others. With the recent split of
push/pop code, less headers are needed in each of the two modules.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-06-02 07:18:47 -03:00
+								#include "netdev-native-tnl.h"
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								#include <errno.h>
 								#include <fcntl.h>
 								#include <sys/socket.h>
 								#include <net/if.h>
-												sparse: Add guards to prevent FreeBSD-incompatible #include order.

FreeBSD insists that <sys/types.h> be included before <netinet/in.h> and
that <netinet/in.h> be included before <arpa/inet.h>.  This adds guards to
the "sparse" headers to yield a warning if this order is violated.  This
commit also adjusts the order of many #includes to suit this requirement.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2017-11-06 14:42:32 -08:00
+								#include <sys/types.h>
-												netdev-native-tnl: Fix a build error on NetBSD 7.0

netinet/ip6.h is not a standalone header there.

Signed-off-by: YAMAMOTO Takashi <yamamoto@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
Tested-by: Jeff Feng <jianhua@us.ibm.com>

											
										
										
											2016-05-20 05:52:19 +00:00
+								#include <netinet/in.h>
-												netdev-vport: remove unneeded headers

Throughout the years, changes in netdev vport have removed the need for some of
the headers, like shash, hmap, and many others. With the recent split of
push/pop code, less headers are needed in each of the two modules.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-06-02 07:18:47 -03:00
+								#include <netinet/ip.h>
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								#include <netinet/ip6.h>
 								#include <sys/ioctl.h>
 								#include <stdlib.h>
 								#include <sys/time.h>
 								#include "byte-order.h"
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								#include "coverage.h"
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								#include "csum.h"
 								#include "dp-packet.h"
-												netdev-vport: remove unneeded headers

Throughout the years, changes in netdev vport have removed the need for some of
the headers, like shash, hmap, and many others. With the recent split of
push/pop code, less headers are needed in each of the two modules.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-06-02 07:18:47 -03:00
+								#include "netdev.h"
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								#include "netdev-vport.h"
 								#include "netdev-vport-private.h"
 								#include "odp-netlink.h"
 								#include "packets.h"
-												netdev-vport: remove unneeded headers

Throughout the years, changes in netdev vport have removed the need for some of
the headers, like shash, hmap, and many others. With the recent split of
push/pop code, less headers are needed in each of the two modules.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-06-02 07:18:47 -03:00
+								#include "seq.h"
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								#include "unaligned.h"
 								#include "unixctl.h"
-												lib: Add non-null assertions to some return values of `dp_packet_data`.

This commit adds some `ovs_assert()` checks to some return values of
`dp_packet_data()` to ensure that they are not NULL and to prevent
null-pointer dereferences, which might lead to unwanted crashes. We use
assertions since it should be impossible for these calls to
`dp_packet_data()` to return NULL.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: James Raphael Tiovalen <jamestiotio@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-09-03 23:21:54 +08:00
+								#include "util.h"
-												netdev-vport: remove unneeded headers

Throughout the years, changes in netdev vport have removed the need for some of
the headers, like shash, hmap, and many others. With the recent split of
push/pop code, less headers are needed in each of the two modules.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-06-02 07:18:47 -03:00
+								#include "openvswitch/vlog.h"
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
 								VLOG_DEFINE_THIS_MODULE(native_tnl);
 								static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								COVERAGE_DEFINE(native_tnl_l3csum_checked);
 								COVERAGE_DEFINE(native_tnl_l3csum_err);
 								COVERAGE_DEFINE(native_tnl_l4csum_checked);
 								COVERAGE_DEFINE(native_tnl_l4csum_err);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								#define VXLAN_HLEN   (sizeof(struct udp_header) +         \
 								                      sizeof(struct vxlanhdr))
 								#define GENEVE_BASE_HLEN   (sizeof(struct udp_header) +         \
 								                            sizeof(struct genevehdr))
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								#define GTPU_HLEN   (sizeof(struct udp_header) +        \
 								                     sizeof(struct gtpuhdr))
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								uint16_t tnl_udp_port_min = 32768;
 								uint16_t tnl_udp_port_max = 61000;
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								ovs_be16
 								netdev_tnl_get_src_port(struct dp_packet *packet)
 								{
 								    uint32_t hash;
 								    if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
 								        hash = dp_packet_get_rss_hash(packet);
 								    } else {
 								        struct flow flow;
 								        flow_extract(packet, &flow);
 								        hash = flow_hash_5tuple(&flow, 0);
 								        dp_packet_set_rss_hash(packet, hash);
 								    }
 								    hash = ((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32;
 								    return htons(hash + tnl_udp_port_min);
 								}
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								static void *
 								ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								                  unsigned int *hlen)
 								{
 								    void *nh;
 								    struct ip_header *ip;
 								    struct ovs_16aligned_ip6_hdr *ip6;
 								    void *l4;
 								    int l3_size;
 								    nh = dp_packet_l3(packet);
 								    ip = nh;
 								    ip6 = nh;
 								    l4 = dp_packet_l4(packet);
 								    if (!nh || !l4) {
 								        return NULL;
 								    }
 								    *hlen = sizeof(struct eth_header);
 								    l3_size = dp_packet_size(packet) -
 								              ((char *)nh - (char *)dp_packet_data(packet));
 								    if (IP_VER(ip->ip_ihl_ver) == 4) {
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								        bool bad_csum = dp_packet_ip_checksum_bad(packet);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        ovs_be32 ip_src, ip_dst;
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								        /* A packet coming from a network device might have the
 								         * csum already checked. In this case, skip the check. */
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								        if (OVS_UNLIKELY(!bad_csum && dp_packet_ip_checksum_unknown(packet))) {
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								            COVERAGE_INC(native_tnl_l3csum_checked);
-												dp-packet: Resolve unknown checksums.

Now that IP and L4 checksum offloading don't require tweaking Tx flags,
update checksum status in parts of OVS that validate checksums (in case
of unknown status).

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:59 +02:00
+								            if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
 								                dp_packet_ip_checksum_set_bad(packet);
 								                bad_csum = true;
 								            } else {
 								                dp_packet_ip_checksum_set_good(packet);
 								            }
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								        }
 								        if (OVS_UNLIKELY(bad_csum)) {
 								            COVERAGE_INC(native_tnl_l3csum_err);
 								            VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
 								            return NULL;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        }
 								        if (ntohs(ip->ip_tot_len) > l3_size) {
 								            VLOG_WARN_RL(&err_rl, "ip packet is truncated (IP length %d, actual %d)",
 								                         ntohs(ip->ip_tot_len), l3_size);
 								            return NULL;
 								        }
 								        if (IP_IHL(ip->ip_ihl_ver) * 4 > sizeof(struct ip_header)) {
 								            VLOG_WARN_RL(&err_rl, "ip options not supported on tunnel packets "
 								                         "(%d bytes)", IP_IHL(ip->ip_ihl_ver) * 4);
 								            return NULL;
 								        }
 								        ip_src = get_16aligned_be32(&ip->ip_src);
 								        ip_dst = get_16aligned_be32(&ip->ip_dst);
 								        tnl->ip_src = ip_src;
 								        tnl->ip_dst = ip_dst;
 								        tnl->ip_tos = ip->ip_tos;
 								        tnl->ip_ttl = ip->ip_ttl;
 								        *hlen += IP_HEADER_LEN;
 								    } else if (IP_VER(ip->ip_ihl_ver) == 6) {
-												netdev-native-tnl: Fix IPv6 tos bits handling.

IPv6 tunnels ignores outer tos bits on recieve and does not
set it on xmit. Following patch fixes it.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								        ovs_be32 tc_flow = get_16aligned_be32(&ip6->ip6_flow);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
 								        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
 								        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
-												netdev-native-tnl: Fix IPv6 tos bits handling.

IPv6 tunnels ignores outer tos bits on recieve and does not
set it on xmit. Following patch fixes it.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
 								        tnl->ip_tos = ntohl(tc_flow) >> 20;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        tnl->ip_ttl = ip6->ip6_hlim;
-												userspace: return correct ipv6 header len.

The ipv6 header len might have extension header, but current
code simply returns fixed ipv6 header length 40-byte.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-03-09 13:02:22 -08:00
+								        *hlen += packet->l4_ofs - packet->l3_ofs;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
 								    } else {
 								        VLOG_WARN_RL(&err_rl, "ipv4 packet has invalid version (%d)",
 								                     IP_VER(ip->ip_ihl_ver));
 								        return NULL;
 								    }
 								    return l4;
 								}
 								/* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
 								 * reallocating the packet if necessary.  'header' should contain an Ethernet
 								 * header, followed by an IPv4 header (without options), and an L4 header.
 								 *
 								 * This function sets the IP header's ip_tot_len field (which should be zeroed
 								 * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								 * updates IP header checksum if not offloaded, as well as the l3 and l4
 								 * offsets in the 'packet'.
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								 *
 								 * Return pointer to the L4 header added to 'packet'. */
 								void *
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_push_ip_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:21 +09:00
+								netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
 								                          int size, int *ip_tot_size, ovs_be32 ipv6_label)
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								{
 								    struct eth_header *eth;
 								    struct ip_header *ip;
 								    struct ovs_16aligned_ip6_hdr *ip6;
 								    eth = dp_packet_push_uninit(packet, size);
 								    *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
 								    memcpy(eth, header, size);
-												userspace: L3 tunnel support for GRE and LISP

Add a boolean "layer3" configuration option for tunnel vports.
The layer3 option defaults to false for all ports except LISP.
GRE ports accept both true and false for "layer3".

A tunnel vport configured with layer3=true receives L3 packets.
which are then converted to Ethernet packets by pushing a dummy
Ethernet heder at the ingress of the OpenFlow pipeline. The
Ethernet header of a packet is stripped before sending to a
layer3 tunnel vport.

Presently a single GRE vport cannot carry both L2 and L3 packets.
But it is possible to create two GRE vports representing the same
GRE tunel, one with layer3=false, the other with layer3=true.
L2 packet from the tunnel are received on the first vport, L3
packets on the second. The controller must send packets to the
layer3 GRE vport to tunnel them without their Ethernet header.

Units tests have been added to check the L3 tunnel handling.

LISP tunnels are not yet supported by the netdev userspace datapath.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:21 +00:00
+								    /* The encapsulated packet has type Ethernet. Adjust dp_packet. */
 								    packet->packet_type = htonl(PT_ETH);
 								    dp_packet_reset_offsets(packet);
 								    packet->l3_ofs = sizeof (struct eth_header);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
 								    if (netdev_tnl_is_header_ipv6(header)) {
 								        ip6 = netdev_tnl_ipv6_hdr(eth);
 								        *ip_tot_size -= IPV6_HEADER_LEN;
 								        ip6->ip6_plen = htons(*ip_tot_size);
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_push_ip_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:21 +09:00
+								        packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label);
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								        dp_packet_ip_checksum_set_unknown(packet);
-												tunneling: Calculate and update packet l4_offset in tunnel push.

The following tunnel combine patch series avoids the packets recirculation
after the tunnel push. So it is necessary to populate all relevant packet meta
data fields for the following combined action-set.

Consider a chained tunnel test case shown below,

PKT-IN --> TUNNEL_PUSH --> MOD_PKT_HDR --> TUNNEL_POP

In this eg: the last tunnel_pop operation uses the l4_offset in the packet to
validate the packets. So it must be calculated and updated in the packet before
executing the action. Since there is no recirculation now on, this calculation
is doing as part of tunnel_push.

Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
Co-authored-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2017-07-19 14:46:02 +01:00
+								        packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        return ip6 + 1;
 								    } else {
 								        ip = netdev_tnl_ip_hdr(eth);
 								        ip->ip_tot_len = htons(*ip_tot_size);
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								        *ip_tot_size -= IP_HEADER_LEN;
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								        /* Postpone checksum to when the packet is pushed to the port. */
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								        dp_packet_ip_checksum_set_partial(packet);
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
-												tunneling: Calculate and update packet l4_offset in tunnel push.

The following tunnel combine patch series avoids the packets recirculation
after the tunnel push. So it is necessary to populate all relevant packet meta
data fields for the following combined action-set.

Consider a chained tunnel test case shown below,

PKT-IN --> TUNNEL_PUSH --> MOD_PKT_HDR --> TUNNEL_POP

In this eg: the last tunnel_pop operation uses the l4_offset in the packet to
validate the packets. So it must be calculated and updated in the packet before
executing the action. Since there is no recirculation now on, this calculation
is doing as part of tunnel_push.

Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
Co-authored-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2017-07-19 14:46:02 +01:00
+								        packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        return ip + 1;
 								    }
 								}
 								static void *
 								udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 								                   unsigned int *hlen)
 								{
 								    struct udp_header *udp;
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								    udp = ip_extract_tnl_md(packet, tnl, hlen);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    if (!udp) {
 								        return NULL;
 								    }
 								    if (udp->udp_csum) {
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								        bool bad_csum = dp_packet_l4_checksum_bad(packet);
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								        if (OVS_UNLIKELY(!bad_csum && dp_packet_l4_checksum_unknown(packet))) {
-												netdev-dpdk: Enable Rx checksum offloading feature on DPDK physical ports.

Add Rx checksum offloading feature support on DPDK physical ports. By default,
the Rx checksum offloading is enabled if NIC supports. However,
the checksum offloading can be turned OFF either while adding a new DPDK
physical port to OVS or at runtime.

The rx checksum offloading can be turned off by setting the parameter to
'false'. For eg: To disable the rx checksum offloading when adding a port,

     'ovs-vsctl add-port br0 dpdk0 -- \
      set Interface dpdk0 type=dpdk options:rx-checksum-offload=false'

OR (to disable at run time after port is being added to OVS)

    'ovs-vsctl set Interface dpdk0 options:rx-checksum-offload=false'

Similarly to turn ON rx checksum offloading at run time,
    'ovs-vsctl set Interface dpdk0 options:rx-checksum-offload=true'

The Tx checksum offloading support is not implemented due to the following
reasons.

1) Checksum offloading and vectorization are mutually exclusive in DPDK poll
mode driver. Vector packet processing is turned OFF when checksum offloading
is enabled which causes significant performance drop at Tx side.

2) Normally, OVS generates checksum for tunnel packets in software at the
'tunnel push' operation, where the tunnel headers are created. However
enabling Tx checksum offloading involves,

*) Mark every packets for tx checksum offloading at 'tunnel_push' and
recirculate.
*) At the time of xmit, validate the same flag and instruct the NIC to do the
checksum calculation.  In case NIC doesnt support Tx checksum offloading,
the checksum calculation has to be done in software before sending out the
packets.

No significant performance improvement noticed with Tx checksum offloading
due to the e overhead of additional validations + non vector packet processing.
In some test scenarios, it introduces performance drop too.

Rx checksum offloading still offers 8-9% of improvement on VxLAN tunneling
decapsulation even though the SSE vector Rx function is disabled in DPDK poll
mode driver.

Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Acked-by: Jesse Gross <jesse@kernel.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2017-01-02 14:27:48 -08:00
+								            uint32_t csum;
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								            COVERAGE_INC(native_tnl_l4csum_checked);
-												netdev-dpdk: Enable Rx checksum offloading feature on DPDK physical ports.

Add Rx checksum offloading feature support on DPDK physical ports. By default,
the Rx checksum offloading is enabled if NIC supports. However,
the checksum offloading can be turned OFF either while adding a new DPDK
physical port to OVS or at runtime.

The rx checksum offloading can be turned off by setting the parameter to
'false'. For eg: To disable the rx checksum offloading when adding a port,

     'ovs-vsctl add-port br0 dpdk0 -- \
      set Interface dpdk0 type=dpdk options:rx-checksum-offload=false'

OR (to disable at run time after port is being added to OVS)

    'ovs-vsctl set Interface dpdk0 options:rx-checksum-offload=false'

Similarly to turn ON rx checksum offloading at run time,
    'ovs-vsctl set Interface dpdk0 options:rx-checksum-offload=true'

The Tx checksum offloading support is not implemented due to the following
reasons.

1) Checksum offloading and vectorization are mutually exclusive in DPDK poll
mode driver. Vector packet processing is turned OFF when checksum offloading
is enabled which causes significant performance drop at Tx side.

2) Normally, OVS generates checksum for tunnel packets in software at the
'tunnel push' operation, where the tunnel headers are created. However
enabling Tx checksum offloading involves,

*) Mark every packets for tx checksum offloading at 'tunnel_push' and
recirculate.
*) At the time of xmit, validate the same flag and instruct the NIC to do the
checksum calculation.  In case NIC doesnt support Tx checksum offloading,
the checksum calculation has to be done in software before sending out the
packets.

No significant performance improvement noticed with Tx checksum offloading
due to the e overhead of additional validations + non vector packet processing.
In some test scenarios, it introduces performance drop too.

Rx checksum offloading still offers 8-9% of improvement on VxLAN tunneling
decapsulation even though the SSE vector Rx function is disabled in DPDK poll
mode driver.

Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Acked-by: Jesse Gross <jesse@kernel.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2017-01-02 14:27:48 -08:00
+								            if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
 								                csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
 								            } else {
 								                csum = packet_csum_pseudoheader(dp_packet_l3(packet));
 								            }
 								            csum = csum_continue(csum, udp, dp_packet_size(packet) -
 								                                 ((const unsigned char *)udp -
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								                                  (const unsigned char *)dp_packet_eth(packet)
-												netdev-dpdk: Enable Rx checksum offloading feature on DPDK physical ports.

Add Rx checksum offloading feature support on DPDK physical ports. By default,
the Rx checksum offloading is enabled if NIC supports. However,
the checksum offloading can be turned OFF either while adding a new DPDK
physical port to OVS or at runtime.

The rx checksum offloading can be turned off by setting the parameter to
'false'. For eg: To disable the rx checksum offloading when adding a port,

     'ovs-vsctl add-port br0 dpdk0 -- \
      set Interface dpdk0 type=dpdk options:rx-checksum-offload=false'

OR (to disable at run time after port is being added to OVS)

    'ovs-vsctl set Interface dpdk0 options:rx-checksum-offload=false'

Similarly to turn ON rx checksum offloading at run time,
    'ovs-vsctl set Interface dpdk0 options:rx-checksum-offload=true'

The Tx checksum offloading support is not implemented due to the following
reasons.

1) Checksum offloading and vectorization are mutually exclusive in DPDK poll
mode driver. Vector packet processing is turned OFF when checksum offloading
is enabled which causes significant performance drop at Tx side.

2) Normally, OVS generates checksum for tunnel packets in software at the
'tunnel push' operation, where the tunnel headers are created. However
enabling Tx checksum offloading involves,

*) Mark every packets for tx checksum offloading at 'tunnel_push' and
recirculate.
*) At the time of xmit, validate the same flag and instruct the NIC to do the
checksum calculation.  In case NIC doesnt support Tx checksum offloading,
the checksum calculation has to be done in software before sending out the
packets.

No significant performance improvement noticed with Tx checksum offloading
due to the e overhead of additional validations + non vector packet processing.
In some test scenarios, it introduces performance drop too.

Rx checksum offloading still offers 8-9% of improvement on VxLAN tunneling
decapsulation even though the SSE vector Rx function is disabled in DPDK poll
mode driver.

Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Acked-by: Jesse Gross <jesse@kernel.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2017-01-02 14:27:48 -08:00
+								                                 ));
-												dp-packet: Resolve unknown checksums.

Now that IP and L4 checksum offloading don't require tweaking Tx flags,
update checksum status in parts of OVS that validate checksums (in case
of unknown status).

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:59 +02:00
+								            if (csum_finish(csum)) {
 								                dp_packet_l4_checksum_set_bad(packet);
 								                bad_csum = true;
 								            } else {
 								                dp_packet_l4_checksum_set_good(packet);
 								            }
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								        }
 								        if (OVS_UNLIKELY(bad_csum)) {
 								            COVERAGE_INC(native_tnl_l4csum_err);
 								            return NULL;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        }
 								        tnl->flags |= FLOW_TNL_F_CSUM;
 								    }
 								    tnl->tp_src = udp->udp_src;
 								    tnl->tp_dst = udp->udp_dst;
 								    return udp + 1;
 								}
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								static void
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								tnl_ol_push(struct dp_packet *packet,
 								            const struct ovs_action_push_tnl *data)
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								{
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    packet->offloads <<= DP_PACKET_OL_SHIFT_COUNT;
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
-												netdev-dpdk: Refactor tunnel checksum offloading.

All information required for checksum offloading can be deduced by
already tracked dp_packet l3_ofs, l4_ofs, inner_l3_ofs and inner_l4_ofs
fields.
Remove DPDK specific l[2-4]_len from generic OVS code.

netdev-dpdk code then fills mbuf specifics step by step:
- outer_l2_len and outer_l3_len are needed for tunneling (and below
  features),
- l2_len and l3_len are needed for IP and L4 checksum (and below features),
- l4_len and tso_segsz are needed when doing TSO,

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Kevin Traynor <ktraynor@redhat.com>

											
										
										
											2024-05-30 15:10:14 +02:00
+								    if (data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
+								        dp_packet_tunnel_set_geneve(packet);
-												netdev-dpdk: Refactor tunnel checksum offloading.

All information required for checksum offloading can be deduced by
already tracked dp_packet l3_ofs, l4_ofs, inner_l3_ofs and inner_l4_ofs
fields.
Remove DPDK specific l[2-4]_len from generic OVS code.

netdev-dpdk code then fills mbuf specifics step by step:
- outer_l2_len and outer_l3_len are needed for tunneling (and below
  features),
- l2_len and l3_len are needed for IP and L4 checksum (and below features),
- l4_len and tso_segsz are needed when doing TSO,

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Kevin Traynor <ktraynor@redhat.com>

											
										
										
											2024-05-30 15:10:14 +02:00
+								    } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
+								        dp_packet_tunnel_set_vxlan(packet);
-												userspace: Support GRE TSO.

This patch extends the userspace datapaths support of tunnel tso from
only supporting VxLAN and Geneve to also supporting GRE tunnels. There
is also a software fallback for cases where the egress netdev does not
support this feature.

Reviewed-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-16 00:21:31 -05:00
+								    } else if (data->tnl_type == OVS_VPORT_TYPE_GRE ||
 								               data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
+								        dp_packet_tunnel_set_gre(packet);
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								    }
 								}
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								static void
 								tnl_ol_pop(struct dp_packet *packet, int off)
 								{
 								    packet->offloads >>= DP_PACKET_OL_SHIFT_COUNT;
 								    dp_packet_reset_packet(packet, off);
 								}
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								void
-												netdev-native-tnl: refactor the tunnel push header.

The patch adds additional 'struct netdev *' to the
native tunnel's push_header() interface.  This is used
for later GRE sequence number support.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-03-09 13:02:23 -08:00
+								netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
 								                           struct dp_packet *packet,
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								                           const struct ovs_action_push_tnl *data)
 								{
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								    uint16_t l3_ofs = packet->l3_ofs;
 								    uint16_t l4_ofs = packet->l4_ofs;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    struct udp_header *udp;
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								    ovs_be16 udp_src;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    int ip_tot_size;
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								    /* We may need to re-calculate the hash and this has to be done before
 								     * modifying the packet. */
 								    udp_src = netdev_tnl_get_src_port(packet);
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_push(packet, data);
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_push_ip_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:21 +09:00
+								    udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
 								                                    &ip_tot_size, 0);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								    udp->udp_src = udp_src;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    udp->udp_len = htons(ip_tot_size);
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    dp_packet_l4_proto_set_udp(packet);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    if (udp->udp_csum) {
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								        dp_packet_l4_checksum_set_partial(packet);
-												dp-packet: Remove Linux specific L4 offloads.

As the virtio-net offload API is used for netdev-linux ports, but
provides no information about the potentially encapsulated protocol
concerned by a checksum request, specific information from this netdev-
specific implementation is propagated into OVS code, and must be
carefully evaluated when some tunnel gets decapsulated.

This induces a cost in "normal" processing path, while the netdev-linux
path is not performance critical.

This patch removes such specific information, yet try harder to parse
the packet on the Rx side and set offload flags accordingly for non
encapsulated traffic. For encapsulated traffic, the inner
checksum is computed.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:52 +02:00
+								    } else {
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								        dp_packet_l4_checksum_set_good(packet);
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								    }
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								    if (l3_ofs != UINT16_MAX) {
 								        packet->inner_l3_ofs = l3_ofs + data->header_len;
 								    }
 								    if (l4_ofs != UINT16_MAX) {
 								        packet->inner_l4_ofs = l4_ofs + data->header_len;
 								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								}
 								static void *
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								eth_build_header(struct ovs_action_push_tnl *data,
 								                 const struct netdev_tnl_build_header_params *params)
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								{
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    uint16_t eth_proto = params->is_ipv6 ? ETH_TYPE_IPV6 : ETH_TYPE_IP;
 								    struct eth_header *eth;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    memset(data->header, 0, sizeof data->header);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    eth = (struct eth_header *)data->header;
 								    eth->eth_dst = params->dmac;
 								    eth->eth_src = params->smac;
 								    eth->eth_type = htons(eth_proto);
 								    data->header_len = sizeof(struct eth_header);
 								    return eth + 1;
 								}
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								void *
 								netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
 								                           const struct netdev_tnl_build_header_params *params,
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_ip_build_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:22 +09:00
+								                           uint8_t next_proto, ovs_be32 ipv6_label)
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								{
 								    void *l3;
 								    l3 = eth_build_header(data, params);
 								    if (!params->is_ipv6) {
 								        ovs_be32 ip_src = in6_addr_get_mapped_ipv4(params->s_ip);
 								        struct ip_header *ip;
 								        ip = (struct ip_header *) l3;
 								        ip->ip_ihl_ver = IP_IHL_VER(5, 4);
 								        ip->ip_tos = params->flow->tunnel.ip_tos;
 								        ip->ip_ttl = params->flow->tunnel.ip_ttl;
 								        ip->ip_proto = next_proto;
 								        put_16aligned_be32(&ip->ip_src, ip_src);
 								        put_16aligned_be32(&ip->ip_dst, params->flow->tunnel.ip_dst);
 								        ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
 								                          htons(IP_DF) : 0;
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								        /* The checksum will be calculated when the headers are pushed
 								         * to the packet if offloading is not enabled. */
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
 								        data->header_len += IP_HEADER_LEN;
 								        return ip + 1;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    } else {
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								        struct ovs_16aligned_ip6_hdr *ip6;
 								        ip6 = (struct ovs_16aligned_ip6_hdr *) l3;
-												netdev-native-tnl: Fix IPv6 tos bits handling.

IPv6 tunnels ignores outer tos bits on recieve and does not
set it on xmit. Following patch fixes it.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								        put_16aligned_be32(&ip6->ip6_flow, htonl(6 << 28) |
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_ip_build_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:22 +09:00
+								                           htonl(params->flow->tunnel.ip_tos << 20) |
 								                           (ipv6_label & htonl(IPV6_LABEL_MASK)));
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								        ip6->ip6_hlim = params->flow->tunnel.ip_ttl;
 								        ip6->ip6_nxt = next_proto;
 								        memcpy(&ip6->ip6_src, params->s_ip, sizeof(ovs_be32[4]));
 								        memcpy(&ip6->ip6_dst, &params->flow->tunnel.ipv6_dst, sizeof(ovs_be32[4]));
 								        data->header_len += IPV6_HEADER_LEN;
 								        return ip6 + 1;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								}
 								static void *
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								udp_build_header(const struct netdev_tunnel_config *tnl_cfg,
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								                 struct ovs_action_push_tnl *data,
 								                 const struct netdev_tnl_build_header_params *params)
 								{
 								    struct udp_header *udp;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_ip_build_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:22 +09:00
+								    udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP, 0);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    udp->udp_dst = tnl_cfg->dst_port;
-												tunnel: Allow UDP zero checksum with IPv6 tunnels.

This patch adopts the proposed RFC 6935 by allowing null UDP checksums
even if the tunnel protocol is IPv6. This is already supported by Linux
through the udp6zerocsumtx tunnel option. It is disabled by default and
IPv6 tunnels are flagged as requiring a checksum, but this patch enables
the user to set csum=false on IPv6 tunnels.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-05 16:45:01 -04:00
+								    if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        /* Write a value in now to mark that we should compute the checksum
 								         * later. 0xffff is handy because it is transparent to the
 								         * calculation. */
 								        udp->udp_csum = htons(0xffff);
 								    }
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    data->header_len += sizeof *udp;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    return udp + 1;
 								}
 								static int
 								gre_header_len(ovs_be16 flags)
 								{
 								    int hlen = 4;
 								    if (flags & htons(GRE_CSUM)) {
 								        hlen += 4;
 								    }
 								    if (flags & htons(GRE_KEY)) {
 								        hlen += 4;
 								    }
 								    if (flags & htons(GRE_SEQ)) {
 								        hlen += 4;
 								    }
 								    return hlen;
 								}
 								static int
 								parse_gre_header(struct dp_packet *packet,
 								                 struct flow_tnl *tnl)
 								{
 								    const struct gre_base_hdr *greh;
 								    ovs_16aligned_be32 *options;
 								    int hlen;
 								    unsigned int ulen;
-												userspace: L3 tunnel support for GRE and LISP

Add a boolean "layer3" configuration option for tunnel vports.
The layer3 option defaults to false for all ports except LISP.
GRE ports accept both true and false for "layer3".

A tunnel vport configured with layer3=true receives L3 packets.
which are then converted to Ethernet packets by pushing a dummy
Ethernet heder at the ingress of the OpenFlow pipeline. The
Ethernet header of a packet is stripped before sending to a
layer3 tunnel vport.

Presently a single GRE vport cannot carry both L2 and L3 packets.
But it is possible to create two GRE vports representing the same
GRE tunel, one with layer3=false, the other with layer3=true.
L2 packet from the tunnel are received on the first vport, L3
packets on the second. The controller must send packets to the
layer3 GRE vport to tunnel them without their Ethernet header.

Units tests have been added to check the L3 tunnel handling.

LISP tunnels are not yet supported by the netdev userspace datapath.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:21 +00:00
+								    uint16_t greh_protocol;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								    greh = ip_extract_tnl_md(packet, tnl, &ulen);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    if (!greh) {
 								        return -EINVAL;
 								    }
 								    if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
 								        return -EINVAL;
 								    }
 								    hlen = ulen + gre_header_len(greh->flags);
 								    if (hlen > dp_packet_size(packet)) {
 								        return -EINVAL;
 								    }
 								    options = (ovs_16aligned_be32 *)(greh + 1);
 								    if (greh->flags & htons(GRE_CSUM)) {
 								        ovs_be16 pkt_csum;
 								        pkt_csum = csum(greh, dp_packet_size(packet) -
 								                              ((const unsigned char *)greh -
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								                               (const unsigned char *)dp_packet_eth(packet)));
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        if (pkt_csum) {
 								            return -EINVAL;
 								        }
 								        tnl->flags = FLOW_TNL_F_CSUM;
 								        options++;
 								    }
 								    if (greh->flags & htons(GRE_KEY)) {
-												netdev-native-tnl: Fix treatment of GRE key on big-endian systems.

The GRE implementation used bitwise shifts to convert an ovs_be32 to an
ovs_be64 (with zero extension), but on big-endian systems these conversions
are no-ops.  This fixes the problem.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Acked-by: Gerhard Stenzel <gstenzel@linux.vnet.ibm.com>

											
										
										
											2016-05-26 16:53:52 -07:00
+								        tnl->tun_id = be32_to_be64(get_16aligned_be32(options));
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        tnl->flags |= FLOW_TNL_F_KEY;
 								        options++;
 								    }
 								    if (greh->flags & htons(GRE_SEQ)) {
 								        options++;
 								    }
-												userspace: L3 tunnel support for GRE and LISP

Add a boolean "layer3" configuration option for tunnel vports.
The layer3 option defaults to false for all ports except LISP.
GRE ports accept both true and false for "layer3".

A tunnel vport configured with layer3=true receives L3 packets.
which are then converted to Ethernet packets by pushing a dummy
Ethernet heder at the ingress of the OpenFlow pipeline. The
Ethernet header of a packet is stripped before sending to a
layer3 tunnel vport.

Presently a single GRE vport cannot carry both L2 and L3 packets.
But it is possible to create two GRE vports representing the same
GRE tunel, one with layer3=false, the other with layer3=true.
L2 packet from the tunnel are received on the first vport, L3
packets on the second. The controller must send packets to the
layer3 GRE vport to tunnel them without their Ethernet header.

Units tests have been added to check the L3 tunnel handling.

LISP tunnels are not yet supported by the netdev userspace datapath.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:21 +00:00
+								    /* Set the new packet type depending on the GRE protocol field. */
 								    greh_protocol = ntohs(greh->protocol);
 								    if (greh_protocol == ETH_TYPE_TEB) {
 								        packet->packet_type = htonl(PT_ETH);
 								    } else if (greh_protocol >= ETH_TYPE_MIN) {
 								        /* Allow all GRE protocol values above 0x5ff as Ethertypes. */
 								        packet->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, greh_protocol);
 								    } else {
 								        return -EINVAL;
 								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    return hlen;
 								}
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								struct dp_packet *
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								netdev_gre_pop_header(struct dp_packet *packet)
 								{
-												lib: Add non-null assertions to some return values of `dp_packet_data`.

This commit adds some `ovs_assert()` checks to some return values of
`dp_packet_data()` to ensure that they are not NULL and to prevent
null-pointer dereferences, which might lead to unwanted crashes. We use
assertions since it should be impossible for these calls to
`dp_packet_data()` to return NULL.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: James Raphael Tiovalen <jamestiotio@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-09-03 23:21:54 +08:00
+								    const void *data_dp = dp_packet_data(packet);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    struct pkt_metadata *md = &packet->md;
 								    struct flow_tnl *tnl = &md->tunnel;
 								    int hlen = sizeof(struct eth_header) + 4;
-												lib: Add non-null assertions to some return values of `dp_packet_data`.

This commit adds some `ovs_assert()` checks to some return values of
`dp_packet_data()` to ensure that they are not NULL and to prevent
null-pointer dereferences, which might lead to unwanted crashes. We use
assertions since it should be impossible for these calls to
`dp_packet_data()` to return NULL.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: James Raphael Tiovalen <jamestiotio@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-09-03 23:21:54 +08:00
+								    ovs_assert(data_dp);
 								    hlen += netdev_tnl_is_header_ipv6(data_dp) ?
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								            IPV6_HEADER_LEN : IP_HEADER_LEN;
 								    pkt_metadata_init_tnl(md);
 								    if (hlen > dp_packet_size(packet)) {
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    hlen = parse_gre_header(packet, tnl);
 								    if (hlen < 0) {
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_pop(packet, hlen);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								    return packet;
 								err:
 								    dp_packet_delete(packet);
 								    return NULL;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								}
 								void
-												userspace: add gre sequence number support.

The patch adds support for gre sequence number.
Default is disable.  When enable with 'options:seq=true',
the outgoing gre packet will have its sequence number
incremented by one.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:49 -04:00
+								netdev_gre_push_header(const struct netdev *netdev,
-												netdev-native-tnl: refactor the tunnel push header.

The patch adds additional 'struct netdev *' to the
native tunnel's push_header() interface.  This is used
for later GRE sequence number support.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-03-09 13:02:23 -08:00
+								                       struct dp_packet *packet,
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								                       const struct ovs_action_push_tnl *data)
 								{
-												userspace: add gre sequence number support.

The patch adds support for gre sequence number.
Default is disable.  When enable with 'options:seq=true',
the outgoing gre packet will have its sequence number
incremented by one.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:49 -04:00
+								    struct netdev_vport *dev = netdev_vport_cast(netdev);
-												userspace: Support GRE TSO.

This patch extends the userspace datapaths support of tunnel tso from
only supporting VxLAN and Geneve to also supporting GRE tunnels. There
is also a software fallback for cases where the egress netdev does not
support this feature.

Reviewed-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-16 00:21:31 -05:00
+								    uint16_t l3_ofs = packet->l3_ofs;
 								    uint16_t l4_ofs = packet->l4_ofs;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    struct gre_base_hdr *greh;
 								    int ip_tot_size;
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_push(packet, data);
-												userspace: Support GRE TSO.

This patch extends the userspace datapaths support of tunnel tso from
only supporting VxLAN and Geneve to also supporting GRE tunnels. There
is also a software fallback for cases where the egress netdev does not
support this feature.

Reviewed-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-16 00:21:31 -05:00
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_push_ip_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:21 +09:00
+								    greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
 								                                     &ip_tot_size, 0);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
 								    if (greh->flags & htons(GRE_CSUM)) {
 								        ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
 								        *csum_opt = csum(greh, ip_tot_size);
 								    }
-												userspace: add gre sequence number support.

The patch adds support for gre sequence number.
Default is disable.  When enable with 'options:seq=true',
the outgoing gre packet will have its sequence number
incremented by one.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:49 -04:00
 								    if (greh->flags & htons(GRE_SEQ)) {
-												dp-packet: Rework TCP segmentation.

Rather than mark with a offload flags + mark with a segmentation size,
simply rely on the netdev implementation which sets a segmentation size
when appropriate.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:21:00 +02:00
+								        if (!dp_packet_get_tso_segsz(packet)) {
-												userspace: Support GRE TSO.

This patch extends the userspace datapaths support of tunnel tso from
only supporting VxLAN and Geneve to also supporting GRE tunnels. There
is also a software fallback for cases where the egress netdev does not
support this feature.

Reviewed-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-16 00:21:31 -05:00
+								            /* Last 4 bytes are GRE seqno. */
 								            int seq_ofs = gre_header_len(greh->flags) - 4;
 								            ovs_16aligned_be32 *seq_opt =
 								                ALIGNED_CAST(ovs_16aligned_be32 *, (char *) greh + seq_ofs);
 								            put_16aligned_be32(seq_opt,
 								                               htonl(atomic_count_inc(&dev->gre_seqno)));
 								        } else {
 								            VLOG_WARN_RL(&err_rl, "Cannot use GRE Sequence numbers with TSO.");
 								        }
 								    }
 								    if (l3_ofs != UINT16_MAX) {
 								        packet->inner_l3_ofs = l3_ofs + data->header_len;
 								    }
 								    if (l4_ofs != UINT16_MAX) {
 								        packet->inner_l4_ofs = l4_ofs + data->header_len;
-												userspace: add gre sequence number support.

The patch adds support for gre sequence number.
Default is disable.  When enable with 'options:seq=true',
the outgoing gre packet will have its sequence number
incremented by one.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:49 -04:00
+								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								}
 								int
 								netdev_gre_build_header(const struct netdev *netdev,
 								                        struct ovs_action_push_tnl *data,
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								                        const struct netdev_tnl_build_header_params *params)
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								{
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    const struct netdev_tunnel_config *tnl_cfg;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    struct gre_base_hdr *greh;
 								    ovs_16aligned_be32 *options;
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    unsigned int hlen;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_ip_build_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:22 +09:00
+								    greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE, 0);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												userspace: Handling of versatile tunnel ports

In netdev_gre_build_header(), GRE protocol and VXLAN next_potocol is set based
on packet_type of flow. If it's about an Ethernet packet, it is set to
ETP_TYPE_TEB. Otherwise, if the name space is OFPHTN_ETHERNET, it is set
according to the name space type.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:59 +00:00
+								    if (params->flow->packet_type == htonl(PT_ETH)) {
-												userspace: L3 tunnel support for GRE and LISP

Add a boolean "layer3" configuration option for tunnel vports.
The layer3 option defaults to false for all ports except LISP.
GRE ports accept both true and false for "layer3".

A tunnel vport configured with layer3=true receives L3 packets.
which are then converted to Ethernet packets by pushing a dummy
Ethernet heder at the ingress of the OpenFlow pipeline. The
Ethernet header of a packet is stripped before sending to a
layer3 tunnel vport.

Presently a single GRE vport cannot carry both L2 and L3 packets.
But it is possible to create two GRE vports representing the same
GRE tunel, one with layer3=false, the other with layer3=true.
L2 packet from the tunnel are received on the first vport, L3
packets on the second. The controller must send packets to the
layer3 GRE vport to tunnel them without their Ethernet header.

Units tests have been added to check the L3 tunnel handling.

LISP tunnels are not yet supported by the netdev userspace datapath.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:21 +00:00
+								        greh->protocol = htons(ETH_TYPE_TEB);
-												userspace: Handling of versatile tunnel ports

In netdev_gre_build_header(), GRE protocol and VXLAN next_potocol is set based
on packet_type of flow. If it's about an Ethernet packet, it is set to
ETP_TYPE_TEB. Otherwise, if the name space is OFPHTN_ETHERNET, it is set
according to the name space type.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:59 +00:00
+								    } else if (pt_ns(params->flow->packet_type) == OFPHTN_ETHERTYPE) {
 								        greh->protocol = pt_ns_type_be(params->flow->packet_type);
 								    } else {
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								        return EINVAL;
-												userspace: L3 tunnel support for GRE and LISP

Add a boolean "layer3" configuration option for tunnel vports.
The layer3 option defaults to false for all ports except LISP.
GRE ports accept both true and false for "layer3".

A tunnel vport configured with layer3=true receives L3 packets.
which are then converted to Ethernet packets by pushing a dummy
Ethernet heder at the ingress of the OpenFlow pipeline. The
Ethernet header of a packet is stripped before sending to a
layer3 tunnel vport.

Presently a single GRE vport cannot carry both L2 and L3 packets.
But it is possible to create two GRE vports representing the same
GRE tunel, one with layer3=false, the other with layer3=true.
L2 packet from the tunnel are received on the first vport, L3
packets on the second. The controller must send packets to the
layer3 GRE vport to tunnel them without their Ethernet header.

Units tests have been added to check the L3 tunnel handling.

LISP tunnels are not yet supported by the netdev userspace datapath.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:21 +00:00
+								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    greh->flags = 0;
 								    options = (ovs_16aligned_be32 *) (greh + 1);
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        greh->flags |= htons(GRE_CSUM);
 								        put_16aligned_be32(options, 0);
 								        options++;
 								    }
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    tnl_cfg = netdev_get_tunnel_config(netdev);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    if (tnl_cfg->out_key_present) {
 								        greh->flags |= htons(GRE_KEY);
-												netdev-native-tnl: Fix treatment of GRE key on big-endian systems.

The GRE implementation used bitwise shifts to convert an ovs_be32 to an
ovs_be64 (with zero extension), but on big-endian systems these conversions
are no-ops.  This fixes the problem.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Acked-by: Gerhard Stenzel <gstenzel@linux.vnet.ibm.com>

											
										
										
											2016-05-26 16:53:52 -07:00
+								        put_16aligned_be32(options, be64_to_be32(params->flow->tunnel.tun_id));
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								        options++;
 								    }
-												userspace: add gre sequence number support.

The patch adds support for gre sequence number.
Default is disable.  When enable with 'options:seq=true',
the outgoing gre packet will have its sequence number
incremented by one.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:49 -04:00
+								    if (tnl_cfg->set_seq) {
 								        greh->flags |= htons(GRE_SEQ);
 								        /* seqno is updated at push header */
 								        options++;
 								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    hlen = (uint8_t *) options - (uint8_t *) greh;
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    data->header_len += hlen;
-												ip6gre: Add ip6gre vport type

Add handlers for OVS_VPORT_TYPE_IP6GRE

Cc: Ben Pfaff <blp@ovn.org>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2018-05-04 10:14:44 -07:00
+								    if (!params->is_ipv6) {
 								        data->tnl_type = OVS_VPORT_TYPE_GRE;
 								    } else {
 								        data->tnl_type = OVS_VPORT_TYPE_IP6GRE;
 								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    return 0;
 								}
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								struct dp_packet *
 								netdev_erspan_pop_header(struct dp_packet *packet)
 								{
 								    const struct gre_base_hdr *greh;
 								    const struct erspan_base_hdr *ersh;
 								    struct pkt_metadata *md = &packet->md;
 								    struct flow_tnl *tnl = &md->tunnel;
 								    int hlen = sizeof(struct eth_header);
 								    unsigned int ulen;
 								    uint16_t greh_protocol;
 								    hlen += netdev_tnl_is_header_ipv6(dp_packet_data(packet)) ?
 								            IPV6_HEADER_LEN : IP_HEADER_LEN;
 								    pkt_metadata_init_tnl(md);
 								    if (hlen > dp_packet_size(packet)) {
 								        goto err;
 								    }
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								    greh = ip_extract_tnl_md(packet, tnl, &ulen);
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    if (!greh) {
 								        goto err;
 								    }
 								    greh_protocol = ntohs(greh->protocol);
 								    if (greh_protocol != ETH_TYPE_ERSPAN1 &&
 								        greh_protocol != ETH_TYPE_ERSPAN2) {
 								        goto err;
 								    }
 								    if (greh->flags & ~htons(GRE_SEQ)) {
 								        goto err;
 								    }
 								    ersh = ERSPAN_HDR(greh);
-												ip6gre: Add ip6gre vport type

Add handlers for OVS_VPORT_TYPE_IP6GRE

Cc: Ben Pfaff <blp@ovn.org>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2018-05-04 10:14:44 -07:00
+								    tnl->tun_id = be16_to_be64(htons(get_sid(ersh)));
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    tnl->erspan_ver = ersh->ver;
 								    if (ersh->ver == 1) {
 								        ovs_16aligned_be32 *index = ALIGNED_CAST(ovs_16aligned_be32 *,
 								                                                 ersh + 1);
 								        tnl->erspan_idx = ntohl(get_16aligned_be32(index));
 								        tnl->flags |= FLOW_TNL_F_KEY;
 								        hlen = ulen + ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V1_MDSIZE;
 								    } else if (ersh->ver == 2) {
 								        struct erspan_md2 *md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);
 								        tnl->erspan_dir = md2->dir;
 								        tnl->erspan_hwid = get_hwid(md2);
 								        tnl->flags |= FLOW_TNL_F_KEY;
 								        hlen = ulen + ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V2_MDSIZE;
 								    } else {
 								        VLOG_WARN_RL(&err_rl, "ERSPAN version error %d", ersh->ver);
 								        goto err;
 								    }
 								    if (hlen > dp_packet_size(packet)) {
 								        goto err;
 								    }
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_pop(packet, hlen);
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
 								    return packet;
 								err:
 								    dp_packet_delete(packet);
 								    return NULL;
 								}
 								void
 								netdev_erspan_push_header(const struct netdev *netdev,
 								                          struct dp_packet *packet,
 								                          const struct ovs_action_push_tnl *data)
 								{
 								    struct netdev_vport *dev = netdev_vport_cast(netdev);
 								    struct erspan_base_hdr *ersh;
 								    struct gre_base_hdr *greh;
 								    struct erspan_md2 *md2;
 								    int ip_tot_size;
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_push_ip_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:21 +09:00
+								    greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
 								                                     &ip_tot_size, 0);
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
 								    /* update GRE seqno */
 								    ovs_16aligned_be32 *seqno = (ovs_16aligned_be32 *) (greh + 1);
-												netdev-vport: Fix unsafe handling of GRE sequence number.

GRE sequence number is maintained as part of the tunnel config.
This triggers tunnel reconfiguration every time set_tunnel_config()
is called, because memset over tunnel config will never be equal to
the new config constructed from database options.

And sequence number incremented non-atomically without holding a
mutex on tunnel push, that may lead to corruption if multiple
threads are sending packets to the same tunnel.

Fix that by moving sequence number to the netdev_vport structure
instead and using an atomic counter.

Fixes: 0ffff4975308 ("userspace: add gre sequence number support.")
Fixes: 7dc18ae96d33 ("userspace: add erspan tunnel support.")
Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.")
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-19 22:05:37 +02:00
+								    put_16aligned_be32(seqno, htonl(atomic_count_inc(&dev->gre_seqno)));
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
 								    /* update v2 timestamp */
 								    if (greh->protocol == htons(ETH_TYPE_ERSPAN2)) {
 								        ersh = ERSPAN_HDR(greh);
 								        md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);
 								        put_16aligned_be32(&md2->timestamp, get_erspan_ts(ERSPAN_100US));
 								    }
 								}
 								int
 								netdev_erspan_build_header(const struct netdev *netdev,
-												trivial: Fix erspan coding style.

Fix indentation and whitespace.

Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-12-03 13:37:56 -08:00
+								                           struct ovs_action_push_tnl *data,
 								                           const struct netdev_tnl_build_header_params *params)
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								{
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    const struct netdev_tunnel_config *tnl_cfg;
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    struct gre_base_hdr *greh;
 								    struct erspan_base_hdr *ersh;
 								    unsigned int hlen;
 								    uint32_t tun_id;
-												erspan: Add flow-based erspan options

The patch add supports for flow-based erspan options.
The erspan_ver, erspan_idx, erspan_dir, and erspan_hwid can be
set as "flow" so that its value is set by the openflow rule,
instead of statically configured at port creation time.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-17 17:46:41 -07:00
+								    int erspan_ver;
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    uint16_t sid;
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_ip_build_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:22 +09:00
+								    greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE, 0);
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    ersh = ERSPAN_HDR(greh);
 								    tun_id = ntohl(be64_to_be32(params->flow->tunnel.tun_id));
 								    /* ERSPAN only has 10-bit session ID */
 								    if (tun_id & ~ERSPAN_SID_MASK) {
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								        return EINVAL;
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    } else {
 								        sid = (uint16_t) tun_id;
 								    }
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    tnl_cfg = netdev_get_tunnel_config(netdev);
-												erspan: Add flow-based erspan options

The patch add supports for flow-based erspan options.
The erspan_ver, erspan_idx, erspan_dir, and erspan_hwid can be
set as "flow" so that its value is set by the openflow rule,
instead of statically configured at port creation time.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-17 17:46:41 -07:00
+								    if (tnl_cfg->erspan_ver_flow) {
 								        erspan_ver = params->flow->tunnel.erspan_ver;
 								    } else {
 								        erspan_ver = tnl_cfg->erspan_ver;
 								    }
 								    if (erspan_ver == 1) {
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								        greh->protocol = htons(ETH_TYPE_ERSPAN1);
 								        greh->flags = htons(GRE_SEQ);
 								        ersh->ver = 1;
 								        set_sid(ersh, sid);
-												netdev-native-tnl: Fix alignment for erspan index.

Flagged by clang.

CC: William Tu <u9012063@gmail.com>
Fixes: 068794b43f0e ("erspan: Add flow-based erspan options")
Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2018-05-23 19:13:56 -07:00
+								        uint32_t erspan_idx = (tnl_cfg->erspan_idx_flow
 								                          ? params->flow->tunnel.erspan_idx
 								                          : tnl_cfg->erspan_idx);
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								        put_16aligned_be32(ALIGNED_CAST(ovs_16aligned_be32 *, ersh + 1),
-												netdev-native-tnl: Fix alignment for erspan index.

Flagged by clang.

CC: William Tu <u9012063@gmail.com>
Fixes: 068794b43f0e ("erspan: Add flow-based erspan options")
Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2018-05-23 19:13:56 -07:00
+								                           htonl(erspan_idx));
-												erspan: Add flow-based erspan options

The patch add supports for flow-based erspan options.
The erspan_ver, erspan_idx, erspan_dir, and erspan_hwid can be
set as "flow" so that its value is set by the openflow rule,
instead of statically configured at port creation time.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-17 17:46:41 -07:00
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								        hlen = ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V1_MDSIZE;
-												erspan: Add flow-based erspan options

The patch add supports for flow-based erspan options.
The erspan_ver, erspan_idx, erspan_dir, and erspan_hwid can be
set as "flow" so that its value is set by the openflow rule,
instead of statically configured at port creation time.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-17 17:46:41 -07:00
+								    } else if (erspan_ver == 2) {
 								        struct erspan_md2 *md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								        greh->protocol = htons(ETH_TYPE_ERSPAN2);
 								        greh->flags = htons(GRE_SEQ);
 								        ersh->ver = 2;
 								        set_sid(ersh, sid);
 								        md2->sgt = 0; /* security group tag */
 								        md2->gra = 0;
 								        put_16aligned_be32(&md2->timestamp, 0);
-												erspan: Add flow-based erspan options

The patch add supports for flow-based erspan options.
The erspan_ver, erspan_idx, erspan_dir, and erspan_hwid can be
set as "flow" so that its value is set by the openflow rule,
instead of statically configured at port creation time.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-17 17:46:41 -07:00
 								        if (tnl_cfg->erspan_hwid_flow) {
 								            set_hwid(md2, params->flow->tunnel.erspan_hwid);
 								        } else {
 								            set_hwid(md2, tnl_cfg->erspan_hwid);
 								        }
 								        if (tnl_cfg->erspan_dir_flow) {
 								            md2->dir = params->flow->tunnel.erspan_dir;
 								        } else {
 								            md2->dir = tnl_cfg->erspan_dir;
 								        }
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
 								        hlen = ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V2_MDSIZE;
 								    } else {
 								        VLOG_WARN_RL(&err_rl, "ERSPAN version error %d", tnl_cfg->erspan_ver);
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								        return EINVAL;
-												userspace: add erspan tunnel support.

ERSPAN is a tunneling protocol based on GRE tunnel.  The patch
add erspan tunnel support for ovs-vswitchd with userspace datapath.
Configuring erspan tunnel is similar to gre tunnel, but with
additional erspan's parameters.  Matching a flow on erspan's
metadata is also supported, see ovs-fields for more details.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-15 16:10:48 -04:00
+								    }
 								    data->header_len += hlen;
 								    if (params->is_ipv6) {
 								        data->tnl_type = OVS_VPORT_TYPE_IP6ERSPAN;
 								    } else {
 								        data->tnl_type = OVS_VPORT_TYPE_ERSPAN;
 								    }
 								    return 0;
 								}
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								struct dp_packet *
 								netdev_gtpu_pop_header(struct dp_packet *packet)
 								{
 								    struct pkt_metadata *md = &packet->md;
 								    struct flow_tnl *tnl = &md->tunnel;
 								    struct gtpuhdr *gtph;
 								    unsigned int gtpu_hlen;
 								    unsigned int hlen;
 								    ovs_assert(packet->l3_ofs > 0);
 								    ovs_assert(packet->l4_ofs > 0);
 								    pkt_metadata_init_tnl(md);
 								    if (GTPU_HLEN > dp_packet_l4_size(packet)) {
 								        goto err;
 								    }
 								    gtph = udp_extract_tnl_md(packet, tnl, &hlen);
 								    if (!gtph) {
 								        goto err;
 								    }
 								    tnl->gtpu_flags = gtph->md.flags;
 								    tnl->gtpu_msgtype = gtph->md.msgtype;
 								    tnl->tun_id = be32_to_be64(get_16aligned_be32(&gtph->teid));
 								    if (tnl->gtpu_msgtype == GTPU_MSGTYPE_GPDU) {
 								        struct ip_header *ip;
 								        if (gtph->md.flags & GTPU_S_MASK) {
 								            gtpu_hlen = GTPU_HLEN + sizeof(struct gtpuhdr_opt);
 								        } else {
 								            gtpu_hlen = GTPU_HLEN;
 								        }
 								        ip = ALIGNED_CAST(struct ip_header *, (char *)gtph + gtpu_hlen);
 								        if (IP_VER(ip->ip_ihl_ver) == 4) {
 								            packet->packet_type = htonl(PT_IPV4);
 								        } else if (IP_VER(ip->ip_ihl_ver) == 6) {
 								            packet->packet_type = htonl(PT_IPV6);
 								        } else {
 								            VLOG_WARN_RL(&err_rl, "GTP-U: Receive non-IP packet.");
 								        }
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								        tnl_ol_pop(packet, hlen + gtpu_hlen);
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    } else {
 								        /* non-GPDU GTP-U messages, ex: echo request, end marker.
 								         * Users should redirect these packets to controller, or.
 								         * any application that handles GTP-U messages, so keep
 								         * the original packet.
 								         */
 								        packet->packet_type = htonl(PT_ETH);
 								        VLOG_WARN_ONCE("Receive non-GPDU msgtype: %"PRIu8,
 								                       gtph->md.msgtype);
 								    }
 								    return packet;
 								err:
 								    dp_packet_delete(packet);
 								    return NULL;
 								}
 								void
 								netdev_gtpu_push_header(const struct netdev *netdev,
 								                        struct dp_packet *packet,
 								                        const struct ovs_action_push_tnl *data)
 								{
 								    struct netdev_vport *dev = netdev_vport_cast(netdev);
 								    struct udp_header *udp;
 								    struct gtpuhdr *gtpuh;
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								    ovs_be16 udp_src;
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    int ip_tot_size;
 								    unsigned int payload_len;
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								    /* We may need to re-calculate the hash and this has to be done before
 								     * modifying the packet. */
 								    udp_src = netdev_tnl_get_src_port(packet);
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    payload_len = dp_packet_size(packet);
-												netdev-native-tnl: Add ipv6_label param in netdev_tnl_push_ip_header.

For tunnels such as SRv6, some popular vendor appliances support
IPv6 flowlabel based load balancing. In preparation for OVS to
support it, this patch modifies the encapsulation to allow IPv6
flowlabel to be configured.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:21 +09:00
+								    udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
 								                                    &ip_tot_size, 0);
-												netdev-native-tnl: Fix use of uninitialized RSS hash.

RSS hash calculation for a packet may be skipped in some cases.  One
of them is a simple match optimization.  Packet is not fully parsed
for the simple match, so there is no enough data to calculate the full
5-tuple hash.  However, when such a packet needs tunnel encapsulation,
we need RSS hash to calculate the source port for the outer UDP header.
And netdev_tnl_get_src_port() function doesn't check if the hash is
valid before using it.  So, such packets will likely end up with
different and unpredictable source ports potentially causing packet
reordering or other issues in the network:

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x10c129c in dp_packet_get_rss_hash lib/dp-packet.h:1029:5
  1 0x10b264c in netdev_tnl_get_src_port lib/netdev-native-tnl.h:131:12
  2 0x10b171a in netdev_tnl_push_udp_header lib/netdev-native-tnl.c:286:20
  3 0xb772fe in netdev_push_header lib/netdev.c:1037:13
  4 0x9673c4 in push_tnl_action lib/dpif-netdev.c:9067:11
  5 0x961abe in dp_execute_cb lib/dpif-netdev.c:9226:13
  6 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
  7 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
  8 0x968f3f in dp_execute_userspace_action lib/dpif-netdev.c:9093:9
  9 0x962e54 in dp_execute_cb lib/dpif-netdev.c:9307:17
 10 0xbcb4b1 in odp_execute_actions lib/odp-execute.c:1008:17
 11 0x8e939f in dp_netdev_execute_actions lib/dpif-netdev.c:9524:5
 12 0x950fef in packet_batch_per_flow_execute lib/dpif-netdev.c:8271:5
 13 0x8ec8db in dp_netdev_input__ lib/dpif-netdev.c:8899:9
 14 0x8eb8ec in dp_netdev_input lib/dpif-netdev.c:8908:5
 15 0x92d5e8 in dp_netdev_process_rxq_port lib/dpif-netdev.c:5660:19
 16 0x8ee2c4 in dpif_netdev_run lib/dpif-netdev.c:6993:25
 17 0x9b442f in dpif_run lib/dpif.c:471:16
 18 0x5f8e3a in type_run ofproto/ofproto-dpif.c:367:9
 19 0x56c508 in ofproto_type_run ofproto/ofproto.c:1879:31
 20 0x4cb388 in bridge_run__ vswitchd/bridge.c:3281:9
 21 0x4c9b00 in bridge_run vswitchd/bridge.c:3346:5
 22 0x526043 in main vswitchd/ovs-vswitchd.c:130:9
 23 0x7f1192 in __libc_start_call_main
 24 0x7f1192 in __libc_start_main@GLIBC_2.2.5
 25 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

The issue is caught by running the 'debug_slow' test under the memory
sanitizer.  Another way to reproduce is by sending two packets at once
through the datapath.  The first one will get the same memory chunk as
the upcalled packet with already calculated RSS, the second one will
get the brand new memory chunk without the calculated RSS, so these
two packets will have different source ports after encapsulation.
The test is updated to cover this case.

Fix the issue by checking if the hash is valid before using, re-parsing
and calculating if it is not.  The netdev_tnl_get_src_port() function
moved to the .c file, since there is no real reason for it to be in the
header.  Compiler can decide on inlining it.  The declaration kept in
the header, since all the other functions declared there, even if there
is no reason for that.

In the future we may want to consolidate all the places where we
re-calculate RSS hash into a single function, but it's a little tricky.
This is also a larger change that would be harder to backport.  So, not
touching that aspect for now.

Re-parsing the packet eliminates advantages of the simple match, but
it was designed primarily for very simple setups that do not involve
tunneling or any other complex processing, so it should not be a big
problem.  And simple match can still be used with tunneling when the
input port provides the RSS hash.

Also, checking if the hash is valid is a right thing to do anyways.

Next step might be to not use simple match when there is no RSS hash
and there is a tunnel push action, but it seems hard to implement,
especially since we don't know the actions until we lookup the flow.

Fixes: e7e9973b80d3 ("dpif-netdev: Forwarding optimization for flows with a simple match.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-29 17:36:45 +01:00
+								    udp->udp_src = udp_src;
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    udp->udp_len = htons(ip_tot_size);
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								    /* Postpone checksum to the egress netdev. */
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    dp_packet_l4_proto_set_udp(packet);
 								    dp_packet_l4_checksum_set_partial(packet);
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
 								    gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
-												netdev-vport: Fix unsafe handling of GRE sequence number.

GRE sequence number is maintained as part of the tunnel config.
This triggers tunnel reconfiguration every time set_tunnel_config()
is called, because memset over tunnel config will never be equal to
the new config constructed from database options.

And sequence number incremented non-atomically without holding a
mutex on tunnel push, that may lead to corruption if multiple
threads are sending packets to the same tunnel.

Fix that by moving sequence number to the netdev_vport structure
instead and using an atomic counter.

Fixes: 0ffff4975308 ("userspace: add gre sequence number support.")
Fixes: 7dc18ae96d33 ("userspace: add erspan tunnel support.")
Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.")
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-19 22:05:37 +02:00
+								    if (gtpuh->md.flags & GTPU_S_MASK) {
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								        ovs_be16 *seqno = ALIGNED_CAST(ovs_be16 *, gtpuh + 1);
-												netdev-vport: Fix unsafe handling of GRE sequence number.

GRE sequence number is maintained as part of the tunnel config.
This triggers tunnel reconfiguration every time set_tunnel_config()
is called, because memset over tunnel config will never be equal to
the new config constructed from database options.

And sequence number incremented non-atomically without holding a
mutex on tunnel push, that may lead to corruption if multiple
threads are sending packets to the same tunnel.

Fix that by moving sequence number to the netdev_vport structure
instead and using an atomic counter.

Fixes: 0ffff4975308 ("userspace: add gre sequence number support.")
Fixes: 7dc18ae96d33 ("userspace: add erspan tunnel support.")
Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.")
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-19 22:05:37 +02:00
+								        *seqno = htons(atomic_count_inc(&dev->gre_seqno));
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								        payload_len += sizeof(struct gtpuhdr_opt);
 								    }
 								    gtpuh->len = htons(payload_len);
 								}
 								int
 								netdev_gtpu_build_header(const struct netdev *netdev,
 								                         struct ovs_action_push_tnl *data,
 								                         const struct netdev_tnl_build_header_params *params)
 								{
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    const struct netdev_tunnel_config *tnl_cfg;
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    struct gtpuhdr *gtph;
 								    unsigned int gtpu_hlen;
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    tnl_cfg = netdev_get_tunnel_config(netdev);
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    gtph = udp_build_header(tnl_cfg, data, params);
 								    /* Set to default if not set in flow. */
 								    gtph->md.flags = params->flow->tunnel.gtpu_flags ?
 								                     params->flow->tunnel.gtpu_flags : GTPU_FLAGS_DEFAULT;
 								    gtph->md.msgtype = params->flow->tunnel.gtpu_msgtype ?
 								                       params->flow->tunnel.gtpu_msgtype : GTPU_MSGTYPE_GPDU;
 								    put_16aligned_be32(&gtph->teid,
 								                       be64_to_be32(params->flow->tunnel.tun_id));
 								    gtpu_hlen = sizeof *gtph;
 								    if (tnl_cfg->set_seq) {
 								        gtph->md.flags |= GTPU_S_MASK;
 								        gtpu_hlen += sizeof(struct gtpuhdr_opt);
 								    }
 								    data->header_len += gtpu_hlen;
 								    data->tnl_type = OVS_VPORT_TYPE_GTPU;
 								    return 0;
 								}
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								int
 								netdev_srv6_build_header(const struct netdev *netdev,
 								                         struct ovs_action_push_tnl *data,
 								                         const struct netdev_tnl_build_header_params *params)
 								{
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    const struct netdev_tunnel_config *tnl_cfg;
-												srv6: Fix misaligned writes to segment list.

Segments list in SRv6 header is 16-bit aligned as most of other fields
in packet headers.  A little counter-intuitively, compilers are allowed
to make alignment assumptions based on the pointer type passed to
memcpy(), so they can use copy instructions that require 32-bit alignment
in case of struct in6_addr pointer.  Reported by UBsan in Clang 18:

 lib/netdev-native-tnl.c:985:16: runtime error: store to misaligned
       address 0x7fd9e97351ce for type 'struct in6_addr *', which
       requires 4 byte alignment
 0x7fd9e97351ce: note: pointer points here
 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00
             ^
   0 0xc1de38 in netdev_srv6_build_header lib/netdev-native-tnl.c:985:9
   1 0x6e794b in tnl_port_build_header ofproto/tunnel.c:751:11
   2 0x6c9c0a in native_tunnel_output ofproto/ofproto-dpif-xlate.c:3887:11
   3 0x6c9c0a in compose_output_action__ ofproto/ofproto-dpif-xlate.c:4502:13
   4 0x6b6646 in compose_output_action ofproto/ofproto-dpif-xlate.c:4564:5
   5 0x6b6646 in xlate_output_action ofproto/ofproto-dpif-xlate.c:5517:13
   6 0x68cfee in do_xlate_actions ofproto/ofproto-dpif-xlate.c:7288:13
   7 0x67fed0 in xlate_actions ofproto/ofproto-dpif-xlate.c:8314:13
   8 0x6468bd in ofproto_trace__ ofproto/ofproto-dpif-trace.c:782:30
   9 0x64484a in ofproto_trace ofproto/ofproto-dpif-trace.c:851:5
  10 0x647469 in ofproto_unixctl_trace ofproto/ofproto-dpif-trace.c:490:9
  11 0xc33771 in process_command lib/unixctl.c:310:13
  12 0xc33771 in run_connection lib/unixctl.c:344:17
  13 0xc33771 in unixctl_server_run lib/unixctl.c:395:21
  14 0x53e6ef in main vswitchd/ovs-vswitchd.c:131:9
  15 0x7f61c7 in __libc_start_call_main (/lib64/libc.so.6+0x2a1c7)
  16 0x7f628a in __libc_start_main@GLIBC_2.2.5 (/lib64/libc.so.6+0x2a28a)
  17 0x42ca24 in _start (vswitchd/ovs-vswitchd+0x42ca24)

 SUMMARY: UndefinedBehaviorSanitizer:
          undefined-behavior lib/netdev-native-tnl.c:985:16

Having misaligned pointers is also generally not allowed in C, let
alone accessing memory through them.

Fix that by using an appropriate ovs_16aligned_in6_addr pointer instead.

Fixes: 7381fd440a88 ("odp: Add SRv6 tunnel actions.")
Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
Reviewed-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-05-17 20:33:03 +02:00
+								    union ovs_16aligned_in6_addr *s;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    const struct in6_addr *segs;
 								    struct srv6_base_hdr *srh;
 								    ovs_be16 dl_type;
 								    int nr_segs;
 								    int i;
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    tnl_cfg = netdev_get_tunnel_config(netdev);
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    if (tnl_cfg->srv6_num_segs) {
 								        nr_segs = tnl_cfg->srv6_num_segs;
 								        segs = tnl_cfg->srv6_segs;
 								    } else {
 								        /*
 								         * If explicit segment list setting is omitted, tunnel destination
 								         * is considered to be the first segment list.
 								         */
 								        nr_segs = 1;
 								        segs = &params->flow->tunnel.ipv6_dst;
 								    }
 								    if (!ipv6_addr_equals(&segs[0], &params->flow->tunnel.ipv6_dst)) {
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								        return EINVAL;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    }
-												userspace: Add new option srv6_flowlabel in SRv6 tunnel.

It supports flowlabel based load balancing by controlling the flowlabel
of outer IPv6 header, which is already implemented in Linux kernel as
seg6_flowlabel sysctl [1].

[1]: https://docs.kernel.org/networking/seg6-sysctl.html

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:23 +09:00
+								    /* Writes the netdev_srv6_flowlabel enum value to the ipv6
 								     * flowlabel field. It must later be replaced by a valid value
 								     * in the header push. */
 								    srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING,
 								                                     htonl(tnl_cfg->srv6_flowlabel));
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    srh->rt_hdr.segments_left = nr_segs - 1;
 								    srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
 								    srh->rt_hdr.hdrlen = 2 * nr_segs;
 								    srh->last_entry = nr_segs - 1;
 								    srh->flags = 0;
 								    srh->tag = 0;
 								    dl_type = params->flow->dl_type;
 								    if (dl_type == htons(ETH_TYPE_IP)) {
 								        srh->rt_hdr.nexthdr = IPPROTO_IPIP;
 								    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
 								        srh->rt_hdr.nexthdr = IPPROTO_IPV6;
 								    } else {
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								        return EOPNOTSUPP;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    }
-												srv6: Fix misaligned writes to segment list.

Segments list in SRv6 header is 16-bit aligned as most of other fields
in packet headers.  A little counter-intuitively, compilers are allowed
to make alignment assumptions based on the pointer type passed to
memcpy(), so they can use copy instructions that require 32-bit alignment
in case of struct in6_addr pointer.  Reported by UBsan in Clang 18:

 lib/netdev-native-tnl.c:985:16: runtime error: store to misaligned
       address 0x7fd9e97351ce for type 'struct in6_addr *', which
       requires 4 byte alignment
 0x7fd9e97351ce: note: pointer points here
 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00
             ^
   0 0xc1de38 in netdev_srv6_build_header lib/netdev-native-tnl.c:985:9
   1 0x6e794b in tnl_port_build_header ofproto/tunnel.c:751:11
   2 0x6c9c0a in native_tunnel_output ofproto/ofproto-dpif-xlate.c:3887:11
   3 0x6c9c0a in compose_output_action__ ofproto/ofproto-dpif-xlate.c:4502:13
   4 0x6b6646 in compose_output_action ofproto/ofproto-dpif-xlate.c:4564:5
   5 0x6b6646 in xlate_output_action ofproto/ofproto-dpif-xlate.c:5517:13
   6 0x68cfee in do_xlate_actions ofproto/ofproto-dpif-xlate.c:7288:13
   7 0x67fed0 in xlate_actions ofproto/ofproto-dpif-xlate.c:8314:13
   8 0x6468bd in ofproto_trace__ ofproto/ofproto-dpif-trace.c:782:30
   9 0x64484a in ofproto_trace ofproto/ofproto-dpif-trace.c:851:5
  10 0x647469 in ofproto_unixctl_trace ofproto/ofproto-dpif-trace.c:490:9
  11 0xc33771 in process_command lib/unixctl.c:310:13
  12 0xc33771 in run_connection lib/unixctl.c:344:17
  13 0xc33771 in unixctl_server_run lib/unixctl.c:395:21
  14 0x53e6ef in main vswitchd/ovs-vswitchd.c:131:9
  15 0x7f61c7 in __libc_start_call_main (/lib64/libc.so.6+0x2a1c7)
  16 0x7f628a in __libc_start_main@GLIBC_2.2.5 (/lib64/libc.so.6+0x2a28a)
  17 0x42ca24 in _start (vswitchd/ovs-vswitchd+0x42ca24)

 SUMMARY: UndefinedBehaviorSanitizer:
          undefined-behavior lib/netdev-native-tnl.c:985:16

Having misaligned pointers is also generally not allowed in C, let
alone accessing memory through them.

Fix that by using an appropriate ovs_16aligned_in6_addr pointer instead.

Fixes: 7381fd440a88 ("odp: Add SRv6 tunnel actions.")
Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
Reviewed-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-05-17 20:33:03 +02:00
+								    s = (union ovs_16aligned_in6_addr *) (srh + 1);
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    for (i = 0; i < nr_segs; i++) {
 								        /* Segment list is written to the header in reverse order. */
 								        memcpy(s, &segs[nr_segs - i - 1], sizeof *s);
 								        s++;
 								    }
 								    data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
 								    data->tnl_type = OVS_VPORT_TYPE_SRV6;
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    return 0;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								}
 								void
 								netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
 								                        struct dp_packet *packet,
 								                        const struct ovs_action_push_tnl *data)
 								{
-												userspace: Add new option srv6_flowlabel in SRv6 tunnel.

It supports flowlabel based load balancing by controlling the flowlabel
of outer IPv6 header, which is already implemented in Linux kernel as
seg6_flowlabel sysctl [1].

[1]: https://docs.kernel.org/networking/seg6-sysctl.html

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:23 +09:00
+								    struct ovs_16aligned_ip6_hdr *inner_ip6, *outer_ip6;
 								    enum netdev_srv6_flowlabel srv6_flowlabel;
 								    ovs_be32 ipv6_label = 0;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    int ip_tot_size;
-												userspace: Add new option srv6_flowlabel in SRv6 tunnel.

It supports flowlabel based load balancing by controlling the flowlabel
of outer IPv6 header, which is already implemented in Linux kernel as
seg6_flowlabel sysctl [1].

[1]: https://docs.kernel.org/networking/seg6-sysctl.html

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:23 +09:00
+								    uint32_t flow;
 								    inner_ip6 = dp_packet_l3(packet);
 								    outer_ip6 = netdev_tnl_ipv6_hdr((void *) data->header);
 								    srv6_flowlabel = ntohl(get_16aligned_be32(&outer_ip6->ip6_flow)) &
 								                     IPV6_LABEL_MASK;
 								    switch (srv6_flowlabel) {
 								    case SRV6_FLOWLABEL_COPY:
 								        flow = ntohl(get_16aligned_be32(&inner_ip6->ip6_flow));
 								        ipv6_label = (flow >> 28) == 6 ? htonl(flow & IPV6_LABEL_MASK) : 0;
 								        break;
 								    case SRV6_FLOWLABEL_ZERO:
 								        ipv6_label = 0;
 								        break;
 								    case SRV6_FLOWLABEL_COMPUTE:
 								        ipv6_label = htonl(dp_packet_get_rss_hash(packet) & IPV6_LABEL_MASK);
 								        break;
 								    }
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
-												userspace: Add new option srv6_flowlabel in SRv6 tunnel.

It supports flowlabel based load balancing by controlling the flowlabel
of outer IPv6 header, which is already implemented in Linux kernel as
seg6_flowlabel sysctl [1].

[1]: https://docs.kernel.org/networking/seg6-sysctl.html

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-23 12:58:23 +09:00
+								    netdev_tnl_push_ip_header(packet, data->header,
 								                              data->header_len, &ip_tot_size, ipv6_label);
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								}
 								struct dp_packet *
 								netdev_srv6_pop_header(struct dp_packet *packet)
 								{
 								    const struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
 								    struct pkt_metadata *md = &packet->md;
 								    struct flow_tnl *tnl = &md->tunnel;
 								    const struct ip6_rt_hdr *rt_hdr;
 								    uint8_t nw_proto = nh->ip6_nxt;
 								    const void *data = nh + 1;
 								    uint8_t nw_frag = 0;
 								    unsigned int hlen;
-												netdev-native-tnl: Fix Coverity integer overflows report.

Fixed potential integer overflow in netdev_srv6_pop_header(),
by making sure the packet length does at least account for
the IPv6 header.

Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-08-28 15:28:39 +02:00
+								    size_t size;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
 								    /*
 								     * Verifies that the routing header is present in the IPv6
 								     * extension headers and that its type is SRv6.
 								     */
-												netdev-native-tnl: Fix Coverity integer overflows report.

Fixed potential integer overflow in netdev_srv6_pop_header(),
by making sure the packet length does at least account for
the IPv6 header.

Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-08-28 15:28:39 +02:00
+								    size = dp_packet_l3_size(packet);
 								    if (size < IPV6_HEADER_LEN) {
 								        goto err;
 								    }
 								    size -= IPV6_HEADER_LEN;
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
+								    if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
 								                             NULL, &rt_hdr)) {
 								        goto err;
 								    }
 								    if (!rt_hdr || rt_hdr->type != IPV6_SRCRT_TYPE_4) {
 								        goto err;
 								    }
 								    if (rt_hdr->segments_left > 0) {
 								        VLOG_WARN_RL(&err_rl, "invalid srv6 segments_left=%d\n",
 								                     rt_hdr->segments_left);
 								        goto err;
 								    }
 								    if (rt_hdr->nexthdr == IPPROTO_IPIP) {
 								        packet->packet_type = htonl(PT_IPV4);
 								    } else if (rt_hdr->nexthdr == IPPROTO_IPV6) {
 								        packet->packet_type = htonl(PT_IPV6);
 								    } else {
 								        goto err;
 								    }
 								    pkt_metadata_init_tnl(md);
-												netdev-native-tnl: Do not validate already checked checksum.

Bad packets were still being validated in software when decapsulating
a IP header. Trust decision taken wrt IP checksum offloading (checking
dp_packet_hwol_l3_csum_ipv4_ol()) and avoid revalidating a known
bad checksum.

While at it, add coverage counters so that checksum validation impact
can be monitored, and unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:35 +01:00
+								    if (!ip_extract_tnl_md(packet, tnl, &hlen)) {
-												netdev-native-tnl: Fix use of uninitialized offset on SRv6 header pop.

Clang's static analyzer will complain about uninitialized value 'hlen'
because we weren't properly checking the error code from a function that
would have initialized the value.

Instead, add a check for that return code.

Fixes: 03fc1ad78521 ("userspace: Add SRv6 tunnel support.")
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-05-27 15:08:42 -04:00
+								        goto err;
 								    }
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_pop(packet, hlen);
-												userspace: Add SRv6 tunnel support.

SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-29 14:51:17 +09:00
 								    return packet;
 								err:
 								    dp_packet_delete(packet);
 								    return NULL;
 								}
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								struct dp_packet *
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								netdev_vxlan_pop_header(struct dp_packet *packet)
 								{
 								    struct pkt_metadata *md = &packet->md;
 								    struct flow_tnl *tnl = &md->tunnel;
 								    struct vxlanhdr *vxh;
 								    unsigned int hlen;
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								    ovs_be32 vx_flags;
 								    enum packet_type next_pt = PT_ETH;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Add assertion in vxlan_pop_header.

During tunnel decapsulation the below steps are performed:
 [1] Tunnel information is populated in packet metadata i.e packet->md->tunnel.
 [2] Outer header gets popped.
 [3] Packet is recirculated.

For [1] to work, the dp_packet L3 and L4 header offsets should be valid.
The offsets in the dp_packet are set as part of miniflow extraction.

If offsets are accidentally reset (or) the pop header operation is performed
prior to miniflow extraction, step [1] fails silently and creates
issues that are harder to debug. Add the assertion to check if the
offsets are valid.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-12 17:43:13 +00:00
+								    ovs_assert(packet->l3_ofs > 0);
 								    ovs_assert(packet->l4_ofs > 0);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    pkt_metadata_init_tnl(md);
 								    if (VXLAN_HLEN > dp_packet_l4_size(packet)) {
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    vxh = udp_extract_tnl_md(packet, tnl, &hlen);
 								    if (!vxh) {
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								    vx_flags = get_16aligned_be32(&vxh->vx_flags);
 								    if (vx_flags & htonl(VXLAN_HF_GPE)) {
 								        vx_flags &= htonl(~VXLAN_GPE_USED_BITS);
 								        /* Drop the OAM packets */
 								        if (vxh->vx_gpe.flags & VXLAN_GPE_FLAGS_O) {
 								            goto err;
 								        }
 								        switch (vxh->vx_gpe.next_protocol) {
 								        case VXLAN_GPE_NP_IPV4:
 								            next_pt = PT_IPV4;
 								            break;
 								        case VXLAN_GPE_NP_IPV6:
 								            next_pt = PT_IPV6;
 								            break;
-												userspace: add NSH support to vxlan-gpe tunnels

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-08-05 13:41:10 +08:00
+								        case VXLAN_GPE_NP_NSH:
 								            next_pt = PT_NSH;
 								            break;
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								        case VXLAN_GPE_NP_ETHERNET:
 								            next_pt = PT_ETH;
 								            break;
 								        default:
 								            goto err;
 								        }
 								    }
 								    if (vx_flags != htonl(VXLAN_FLAGS) ||
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								       (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
 								        VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								                     ntohl(vx_flags),
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								                     ntohl(get_16aligned_be32(&vxh->vx_vni)));
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
 								    tnl->flags |= FLOW_TNL_F_KEY;
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								    packet->packet_type = htonl(next_pt);
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_pop(packet, hlen + VXLAN_HLEN);
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								    if (next_pt != PT_ETH) {
 								        packet->l3_ofs = 0;
 								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								    return packet;
 								err:
 								    dp_packet_delete(packet);
 								    return NULL;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								}
 								int
 								netdev_vxlan_build_header(const struct netdev *netdev,
 								                          struct ovs_action_push_tnl *data,
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								                          const struct netdev_tnl_build_header_params *params)
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								{
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    const struct netdev_tunnel_config *tnl_cfg;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    struct vxlanhdr *vxh;
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    tnl_cfg = netdev_get_tunnel_config(netdev);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    vxh = udp_build_header(tnl_cfg, data, params);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								    if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
 								        put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS | VXLAN_HF_GPE));
 								        put_16aligned_be32(&vxh->vx_vni,
 								                           htonl(ntohll(params->flow->tunnel.tun_id) << 8));
-												userspace: Handling of versatile tunnel ports

In netdev_gre_build_header(), GRE protocol and VXLAN next_potocol is set based
on packet_type of flow. If it's about an Ethernet packet, it is set to
ETP_TYPE_TEB. Otherwise, if the name space is OFPHTN_ETHERNET, it is set
according to the name space type.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:59 +00:00
+								        if (params->flow->packet_type == htonl(PT_ETH)) {
 								            vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_ETHERNET;
 								        } else if (pt_ns(params->flow->packet_type) == OFPHTN_ETHERTYPE) {
 								            switch (pt_ns_type(params->flow->packet_type)) {
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								            case ETH_TYPE_IP:
 								                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_IPV4;
 								                break;
 								            case ETH_TYPE_IPV6:
 								                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_IPV6;
 								                break;
-												userspace: add NSH support to vxlan-gpe tunnels

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-08-05 13:41:10 +08:00
+								            case ETH_TYPE_NSH:
 								                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_NSH;
 								                break;
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								            case ETH_TYPE_TEB:
 								                vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_ETHERNET;
 								                break;
-												userspace: Handling of versatile tunnel ports

In netdev_gre_build_header(), GRE protocol and VXLAN next_potocol is set based
on packet_type of flow. If it's about an Ethernet packet, it is set to
ETP_TYPE_TEB. Otherwise, if the name space is OFPHTN_ETHERNET, it is set
according to the name space type.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:59 +00:00
+								            default:
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								                return EINVAL;
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								            }
 								        } else {
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								            return EINVAL;
-												userspace: add vxlan gpe support to vport

This patch is based on the "datapath: enable vxlangpe creation in compat mode"
from Yi Yang. It introduces an extension option "gpe" to the vxlan port in the
netdev-dpdk datapath. Description of vxlan gpe protocoll was added to header
file lib/packets.h. In the vxlan specific methods the different packet are
introduced and handled.

Added VXLAN GPE tunnel push test.

Signed-off-by: Yi Yang <yi.y.yang at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:26 +00:00
+								        }
 								    } else {
 								        put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
 								        put_16aligned_be32(&vxh->vx_vni,
 								                           htonl(ntohll(params->flow->tunnel.tun_id) << 8));
 								    }
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    data->header_len += sizeof *vxh;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    data->tnl_type = OVS_VPORT_TYPE_VXLAN;
 								    return 0;
 								}
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								struct dp_packet *
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								netdev_geneve_pop_header(struct dp_packet *packet)
 								{
 								    struct pkt_metadata *md = &packet->md;
 								    struct flow_tnl *tnl = &md->tunnel;
 								    struct genevehdr *gnh;
 								    unsigned int hlen, opts_len, ulen;
 								    pkt_metadata_init_tnl(md);
 								    if (GENEVE_BASE_HLEN > dp_packet_l4_size(packet)) {
 								        VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%"PRIuSIZE"\n",
 								                     (unsigned int)GENEVE_BASE_HLEN, dp_packet_l4_size(packet));
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    gnh = udp_extract_tnl_md(packet, tnl, &ulen);
 								    if (!gnh) {
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    opts_len = gnh->opt_len * 4;
 								    hlen = ulen + GENEVE_BASE_HLEN + opts_len;
 								    if (hlen > dp_packet_size(packet)) {
 								        VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
 								                     hlen, dp_packet_size(packet));
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    if (gnh->ver != 0) {
 								        VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
 								        VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
 								                     ntohs(gnh->proto_type));
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								        goto err;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    }
 								    tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
 								    tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
 								    tnl->flags |= FLOW_TNL_F_KEY;
 								    memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
 								    tnl->metadata.present.len = opts_len;
 								    tnl->flags |= FLOW_TNL_F_UDPIF;
-												userspace: L3 tunnel support for GRE and LISP

Add a boolean "layer3" configuration option for tunnel vports.
The layer3 option defaults to false for all ports except LISP.
GRE ports accept both true and false for "layer3".

A tunnel vport configured with layer3=true receives L3 packets.
which are then converted to Ethernet packets by pushing a dummy
Ethernet heder at the ingress of the OpenFlow pipeline. The
Ethernet header of a packet is stripped before sending to a
layer3 tunnel vport.

Presently a single GRE vport cannot carry both L2 and L3 packets.
But it is possible to create two GRE vports representing the same
GRE tunel, one with layer3=false, the other with layer3=true.
L2 packet from the tunnel are received on the first vport, L3
packets on the second. The controller must send packets to the
layer3 GRE vport to tunnel them without their Ethernet header.

Units tests have been added to check the L3 tunnel handling.

LISP tunnels are not yet supported by the netdev userspace datapath.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:21 +00:00
+								    packet->packet_type = htonl(PT_ETH);
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    tnl_ol_pop(packet, hlen);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev: Return number of packet from netdev_pop_header()

Current tunnel-pop API does not allow the netdev implementation
retain a packet but STT can keep a packet from batch of packets
during TCP reassembly processing. To return exact count of
valid packet STT need to pass this number of packet parameter
as a reference.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:06 -07:00
+								    return packet;
 								err:
 								    dp_packet_delete(packet);
 								    return NULL;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								}
 								int
 								netdev_geneve_build_header(const struct netdev *netdev,
 								                           struct ovs_action_push_tnl *data,
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								                           const struct netdev_tnl_build_header_params *params)
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								{
 								    struct genevehdr *gnh;
 								    int opt_len;
 								    bool crit_opt;
-												netdev-vport: RCU-fy tunnel config.

Tunnel config can be accessed by multiple threads at the same time and
it is supposed to be protected by the netdev_vport mutex.  However,
many functions are getting direct access to it via netdev API without
taking the mutex, creating a potential for various race conditions.

Fix that by protecting the tunnel config with RCU.  The whole structure
is replaced on configuration changes.  Individual fields are never
updated and the structure itself is constant.  This way it can be safely
used by different threads within RCU grace period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-20 01:35:26 +02:00
+								    gnh = udp_build_header(netdev_get_tunnel_config(netdev), data, params);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    put_16aligned_be32(&gnh->vni, htonl(ntohll(params->flow->tunnel.tun_id) << 8));
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    opt_len = tun_metadata_to_geneve_header(&params->flow->tunnel,
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								                                            gnh->options, &crit_opt);
 								    gnh->opt_len = opt_len / 4;
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    gnh->oam = !!(params->flow->tunnel.flags & FLOW_TNL_F_OAM);
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    gnh->critical = crit_opt ? 1 : 0;
 								    gnh->proto_type = htons(ETH_TYPE_TEB);
-												netdev-native-tnl: Introduce ip_build_header()

The native tunneling build tunnel header code is spread across
two different modules, it makes pretty hard to follow the code.
Following patch refactors the code to move all code to
netdev-ative-tnl module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-23 20:27:14 -07:00
+								    data->header_len += sizeof *gnh + opt_len;
-												netdev-vport: Factor-out tunnel Push-pop code into separate module.

It is better to move tunnel push-pop action specific functions into
separate module.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:31:33 -07:00
+								    data->tnl_type = OVS_VPORT_TYPE_GENEVE;
 								    return 0;
 								}
 								void
 								netdev_tnl_egress_port_range(struct unixctl_conn *conn, int argc,
 								                             const char *argv[], void *aux OVS_UNUSED)
 								{
 								    int val1, val2;
 								    if (argc < 3) {
 								        struct ds ds = DS_EMPTY_INITIALIZER;
 								        ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
 								                            tnl_udp_port_min, tnl_udp_port_max);
 								        unixctl_command_reply(conn, ds_cstr(&ds));
 								        ds_destroy(&ds);
 								        return;
 								    }
 								    if (argc != 3) {
 								        return;
 								    }
 								    val1 = atoi(argv[1]);
 								    if (val1 <= 0 || val1 > UINT16_MAX) {
 								        unixctl_command_reply(conn, "Invalid min.");
 								        return;
 								    }
 								    val2 = atoi(argv[2]);
 								    if (val2 <= 0 || val2 > UINT16_MAX) {
 								        unixctl_command_reply(conn, "Invalid max.");
 								        return;
 								    }
 								    if (val1 > val2) {
 								        tnl_udp_port_min = val2;
 								        tnl_udp_port_max = val1;
 								    } else {
 								        tnl_udp_port_min = val1;
 								        tnl_udp_port_max = val2;
 								    }
 								    seq_change(tnl_conf_seq);
 								    unixctl_command_reply(conn, "OK");
 								}