ovs/lib/dp-packet.h

/*
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef DPBUF_H
#define DPBUF_H 1

#include <stddef.h>
#include <stdint.h>

#ifdef DPDK_NETDEV
#include <rte_config.h>
#include <rte_mbuf.h>
#endif

#include "csum.h"
#include "netdev-afxdp.h"
#include "netdev-dpdk.h"
#include "openvswitch/list.h"
#include "packets.h"
#include "util.h"
#include "flow.h"

#ifdef  __cplusplus
extern "C" {
#endif

enum OVS_PACKED_ENUM dp_packet_source {
    DPBUF_MALLOC,              /* Obtained via malloc(). */
    DPBUF_STACK,               /* Un-movable stack space or static buffer. */
    DPBUF_STUB,                /* Starts on stack, may expand into heap. */
    DPBUF_DPDK,                /* buffer data is from DPDK allocated memory.
                                * ref to dp_packet_init_dpdk() in dp-packet.c.
                                */
    DPBUF_AFXDP,               /* Buffer data from XDP frame. */
};

#define DP_PACKET_CONTEXT_SIZE 64

#ifdef DPDK_NETDEV
#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF
#else
#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF
#endif

#define DP_PACKET_OL_SUPPORTED_MASK 0

/* Bit masks for the 'offloads' member of the 'dp_packet' structure. */
enum OVS_PACKED_ENUM dp_packet_offload_mask {
    /* Bad IP checksum in the packet. */
    DP_PACKET_OL_IP_CKSUM_BAD = UINT16_C(1) << 4,
    /* Valid IP checksum in the packet. */
    DP_PACKET_OL_IP_CKSUM_GOOD = UINT16_C(1) << 7,

    /* Bad L4 checksum in the packet. */
    DP_PACKET_OL_L4_CKSUM_BAD = UINT16_C(1) << 3,
    /* Valid L4 checksum in the packet. */
    DP_PACKET_OL_L4_CKSUM_GOOD = UINT16_C(1) << 8,

    /* Protocol corresponding to above L4 checksums. */
    DP_PACKET_OL_L4_PROTO_TCP = UINT16_C(1) << 9,
    DP_PACKET_OL_L4_PROTO_UDP = UINT16_C(1) << 10,

    /* Bits for marking a packet as tunneled. */
    DP_PACKET_OL_TUNNEL_GENEVE = UINT16_C(1) << 11,
    DP_PACKET_OL_TUNNEL_VXLAN = UINT16_C(1) << 12,

#define DP_PACKET_OL_SHIFT_COUNT 16

    /* Inner offloads. */
    DP_PACKET_OL_INNER_IP_CKSUM_BAD =
        DP_PACKET_OL_IP_CKSUM_BAD << DP_PACKET_OL_SHIFT_COUNT,
    DP_PACKET_OL_INNER_IP_CKSUM_GOOD =
        DP_PACKET_OL_IP_CKSUM_GOOD << DP_PACKET_OL_SHIFT_COUNT,
    DP_PACKET_OL_INNER_L4_CKSUM_BAD =
        DP_PACKET_OL_L4_CKSUM_BAD << DP_PACKET_OL_SHIFT_COUNT,
    DP_PACKET_OL_INNER_L4_CKSUM_GOOD =
        DP_PACKET_OL_L4_CKSUM_GOOD << DP_PACKET_OL_SHIFT_COUNT,
    DP_PACKET_OL_INNER_L4_PROTO_TCP =
        DP_PACKET_OL_L4_PROTO_TCP << DP_PACKET_OL_SHIFT_COUNT,
    DP_PACKET_OL_INNER_L4_PROTO_UDP =
        DP_PACKET_OL_L4_PROTO_UDP << DP_PACKET_OL_SHIFT_COUNT,
};

#ifdef DPDK_NETDEV
BUILD_ASSERT_DECL(DP_PACKET_OL_IP_CKSUM_BAD == RTE_MBUF_F_RX_IP_CKSUM_BAD);
BUILD_ASSERT_DECL(DP_PACKET_OL_IP_CKSUM_GOOD == RTE_MBUF_F_RX_IP_CKSUM_GOOD);
BUILD_ASSERT_DECL(DP_PACKET_OL_L4_CKSUM_BAD == RTE_MBUF_F_RX_L4_CKSUM_BAD);
BUILD_ASSERT_DECL(DP_PACKET_OL_L4_CKSUM_GOOD == RTE_MBUF_F_RX_L4_CKSUM_GOOD);
#endif

#define DP_PACKET_OL_IP_CKSUM_MASK (DP_PACKET_OL_IP_CKSUM_GOOD \
                                    | DP_PACKET_OL_IP_CKSUM_BAD)
#define DP_PACKET_OL_L4_CKSUM_MASK (DP_PACKET_OL_L4_CKSUM_GOOD \
                                    | DP_PACKET_OL_L4_CKSUM_BAD)

#define DP_PACKET_OL_TUNNEL_MASK (DP_PACKET_OL_TUNNEL_GENEVE \
                                  | DP_PACKET_OL_TUNNEL_VXLAN)

#define DP_PACKET_OL_L4_PROTO_MASK (DP_PACKET_OL_L4_PROTO_TCP \
                                    | DP_PACKET_OL_L4_PROTO_UDP)

#define DP_PACKET_OL_INNER_IP_CKSUM_MASK (DP_PACKET_OL_INNER_IP_CKSUM_GOOD \
                                          | DP_PACKET_OL_INNER_IP_CKSUM_BAD)

#define DP_PACKET_OL_INNER_L4_CKSUM_MASK (DP_PACKET_OL_INNER_L4_CKSUM_GOOD \
                                          | DP_PACKET_OL_INNER_L4_CKSUM_BAD)

#define DP_PACKET_OL_INNER_L4_PROTO_MASK (DP_PACKET_OL_INNER_L4_PROTO_TCP \
                                          | DP_PACKET_OL_INNER_L4_PROTO_UDP)

/* Buffer for holding packet data.  A dp_packet is automatically reallocated
 * as necessary if it grows too large for the available memory.
 * By default the packet type is set to Ethernet (PT_ETH).
 */
struct dp_packet {
#ifdef DPDK_NETDEV
    struct rte_mbuf mbuf;       /* DPDK mbuf */
#else
    void *base_;                /* First byte of allocated space. */
    uint16_t allocated_;        /* Number of bytes allocated. */
    uint16_t data_ofs;          /* First byte actually in use. */
    uint32_t size_;             /* Number of bytes in use. */
    uint32_t ol_flags;          /* Offloading flags. */
    uint32_t rss_hash;          /* Packet hash. */
    uint32_t flow_mark;         /* Packet flow mark. */
    uint16_t tso_segsz;         /* TCP segment size. */
#endif
    enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
    bool has_hash;                 /* Is the 'rss_hash' valid? */
    bool has_mark;                 /* Is the 'flow_mark' valid? */

    /* All the following elements of this struct are copied in a single call
     * of memcpy in dp_packet_clone_with_headroom. */
    uint16_t l2_pad_size;          /* Detected l2 padding size.
                                    * Padding is non-pullable. */
    uint16_t l2_5_ofs;             /* MPLS label stack offset, or UINT16_MAX */
    uint16_t l3_ofs;               /* Network-level header offset,
                                    * or UINT16_MAX. */
    uint16_t l4_ofs;               /* Transport-level header offset,
                                      or UINT16_MAX. */
    uint16_t inner_l3_ofs;         /* Inner Network-level header offset,
                                    * or UINT16_MAX. */
    uint16_t inner_l4_ofs;         /* Inner Transport-level header offset,
                                      or UINT16_MAX. */
    uint32_t cutlen;               /* length in bytes to cut from the end. */
    ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
    enum OVS_PACKED_ENUM dp_packet_offload_mask offloads;
                                   /* Checksums status and offloads. */
    union {
        struct pkt_metadata md;
        uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
    };
};

BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct dp_packet, offloads)
                  == sizeof(uint32_t));

#if HAVE_AF_XDP
struct dp_packet_afxdp {
    struct umem_pool *mpool;
    struct dp_packet packet;
};
#endif

static inline void *dp_packet_data(const struct dp_packet *);
static inline void dp_packet_set_data(struct dp_packet *, void *);
static inline void *dp_packet_base(const struct dp_packet *);
static inline void dp_packet_set_base(struct dp_packet *, void *);

static inline uint32_t dp_packet_size(const struct dp_packet *);
static inline void dp_packet_set_size(struct dp_packet *, uint32_t);

static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);

static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);

void *dp_packet_resize_l2(struct dp_packet *, int increment);
void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
static inline void *dp_packet_eth(const struct dp_packet *);
static inline void dp_packet_reset_outer_offsets(struct dp_packet *);
static inline void dp_packet_reset_offsets(struct dp_packet *);
static inline void dp_packet_reset_offload(struct dp_packet *);
static inline uint16_t dp_packet_l2_pad_size(const struct dp_packet *);
static inline void dp_packet_set_l2_pad_size(struct dp_packet *, uint16_t);
static inline void *dp_packet_l2_5(const struct dp_packet *);
static inline void dp_packet_set_l2_5(struct dp_packet *, void *);
static inline void *dp_packet_l3(const struct dp_packet *);
static inline void dp_packet_set_l3(struct dp_packet *, void *);
static inline void *dp_packet_l4(const struct dp_packet *);
static inline void dp_packet_set_l4(struct dp_packet *, void *);
static inline size_t dp_packet_l4_size(const struct dp_packet *);
static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *);
static inline const void *dp_packet_get_udp_payload(const struct dp_packet *);
static inline const void *dp_packet_get_sctp_payload(const struct dp_packet *);
static inline const void *dp_packet_get_icmp_payload(const struct dp_packet *);
static inline const void *dp_packet_get_nd_payload(const struct dp_packet *);

void dp_packet_use(struct dp_packet *, void *, size_t);
void dp_packet_use_stub(struct dp_packet *, void *, size_t);
void dp_packet_use_const(struct dp_packet *, const void *, size_t);
#if HAVE_AF_XDP
void dp_packet_use_afxdp(struct dp_packet *, void *, size_t, size_t);
#endif
void dp_packet_init_dpdk(struct dp_packet *);

void dp_packet_init(struct dp_packet *, size_t);
void dp_packet_uninit(struct dp_packet *);

struct dp_packet *dp_packet_new(size_t);
struct dp_packet *dp_packet_new_with_headroom(size_t, size_t headroom);
struct dp_packet *dp_packet_clone(const struct dp_packet *);
struct dp_packet *dp_packet_clone_with_headroom(const struct dp_packet *,
                                                size_t headroom);
struct dp_packet *dp_packet_clone_data(const void *, size_t);
struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
                                                     size_t headroom);
void dp_packet_resize(struct dp_packet *b, size_t new_headroom,
                      size_t new_tailroom);
static inline void dp_packet_delete(struct dp_packet *);
static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *);

static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
                                 size_t size);
static inline void *dp_packet_at_assert(const struct dp_packet *,
                                        size_t offset, size_t size);
static inline void *dp_packet_tail(const struct dp_packet *);
static inline void *dp_packet_end(const struct dp_packet *);

void *dp_packet_put_uninit(struct dp_packet *, size_t);
void *dp_packet_put_zeros(struct dp_packet *, size_t);
void *dp_packet_put(struct dp_packet *, const void *, size_t);
char *dp_packet_put_hex(struct dp_packet *, const char *s, size_t *n);
void dp_packet_reserve(struct dp_packet *, size_t);
void dp_packet_reserve_with_tailroom(struct dp_packet *, size_t headroom,
                                     size_t tailroom);
void *dp_packet_push_uninit(struct dp_packet *, size_t);
void *dp_packet_push_zeros(struct dp_packet *, size_t);
void *dp_packet_push(struct dp_packet *, const void *, size_t);

static inline size_t dp_packet_headroom(const struct dp_packet *);
static inline size_t dp_packet_tailroom(const struct dp_packet *);
void dp_packet_prealloc_headroom(struct dp_packet *, size_t);
void dp_packet_prealloc_tailroom(struct dp_packet *, size_t);
void dp_packet_shift(struct dp_packet *, int);

static inline void dp_packet_clear(struct dp_packet *);
static inline void *dp_packet_pull(struct dp_packet *, size_t);
static inline void *dp_packet_try_pull(struct dp_packet *, size_t);

void *dp_packet_steal_data(struct dp_packet *);

static inline bool dp_packet_equal(const struct dp_packet *,
                                   const struct dp_packet *);

bool dp_packet_compare_offsets(struct dp_packet *good,
                               struct dp_packet *test,
                               struct ds *err_str);
void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t);


/* Frees memory that 'b' points to, as well as 'b' itself. */
static inline void
dp_packet_delete(struct dp_packet *b)
{
    if (b) {
        if (b->source == DPBUF_DPDK) {
            free_dpdk_buf(b);
            return;
        }

        if (b->source == DPBUF_AFXDP) {
            free_afxdp_buf(b);
            return;
        }

        dp_packet_uninit(b);
#ifdef DPDK_NETDEV
        free_cacheline(b);
#else
        free(b);
#endif
    }
}

/* Swaps content of two packets. */
static inline void
dp_packet_swap(struct dp_packet *a, struct dp_packet *b)
{
    ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB);
    ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB);
    struct dp_packet c = *a;

    *a = *b;
    *b = c;
}

/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
 * byte 'offset'.  Otherwise, returns a null pointer. */
static inline void *
dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
{
    return offset + size <= dp_packet_size(b)
           ? (char *) dp_packet_data(b) + offset
           : NULL;
}

/* Returns a pointer to byte 'offset' in 'b', which must contain at least
 * 'offset + size' bytes of data. */
static inline void *
dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
{
    ovs_assert(offset + size <= dp_packet_size(b));
    return ((char *) dp_packet_data(b)) + offset;
}

/* Returns a pointer to byte following the last byte of data in use in 'b'. */
static inline void *
dp_packet_tail(const struct dp_packet *b)
{
    return (char *) dp_packet_data(b) + dp_packet_size(b);
}

/* Returns a pointer to byte following the last byte allocated for use (but
 * not necessarily in use) in 'b'. */
static inline void *
dp_packet_end(const struct dp_packet *b)
{
    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
}

/* Returns the number of bytes of headroom in 'b', that is, the number of bytes
 * of unused space in dp_packet 'b' before the data that is in use.  (Most
 * commonly, the data in a dp_packet is at its beginning, and thus the
 * dp_packet's headroom is 0.) */
static inline size_t
dp_packet_headroom(const struct dp_packet *b)
{
    return (char *) dp_packet_data(b) - (char *) dp_packet_base(b);
}

/* Returns the number of bytes that may be appended to the tail end of
 * dp_packet 'b' before the dp_packet must be reallocated. */
static inline size_t
dp_packet_tailroom(const struct dp_packet *b)
{
    return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
}

/* Clears any data from 'b'. */
static inline void
dp_packet_clear(struct dp_packet *b)
{
    dp_packet_set_data(b, dp_packet_base(b));
    dp_packet_set_size(b, 0);
    dp_packet_reset_offsets(b);
    dp_packet_reset_offload(b);
}

/* Removes 'size' bytes from the head end of 'b', which must contain at least
 * 'size' bytes of data.  Returns the first byte of data removed. */
static inline void *
dp_packet_pull(struct dp_packet *b, size_t size)
{
    void *data = dp_packet_data(b);
    ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size);
    dp_packet_set_data(b, (char *) dp_packet_data(b) + size);
    dp_packet_set_size(b, dp_packet_size(b) - size);
    return data;
}

/* If 'b' has at least 'size' bytes of data, removes that many bytes from the
 * head end of 'b' and returns the first byte removed.  Otherwise, returns a
 * null pointer without modifying 'b'. */
static inline void *
dp_packet_try_pull(struct dp_packet *b, size_t size)
{
    return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
        ? dp_packet_pull(b, size) : NULL;
}

static inline bool
dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b)
{
    return dp_packet_size(a) == dp_packet_size(b) &&
           !memcmp(dp_packet_data(a), dp_packet_data(b), dp_packet_size(a));
}

static inline bool
dp_packet_is_eth(const struct dp_packet *b)
{
    return b->packet_type == htonl(PT_ETH);
}

/* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2
 * headers, so return NULL if it is not set. */
static inline void *
dp_packet_eth(const struct dp_packet *b)
{
    return (dp_packet_is_eth(b) && b->l3_ofs != UINT16_MAX)
            ? dp_packet_data(b) : NULL;
}

/* Resets all outer layer offsets. */
static inline void
dp_packet_reset_outer_offsets(struct dp_packet *b)
{
    b->l2_pad_size = 0;
    b->l2_5_ofs = UINT16_MAX;
    b->l3_ofs = UINT16_MAX;
    b->l4_ofs = UINT16_MAX;
}

/* Resets all layer offsets.  'l3' offset must be set before 'l2' can be
 * retrieved. */
static inline void
dp_packet_reset_offsets(struct dp_packet *b)
{
    dp_packet_reset_outer_offsets(b);
    b->inner_l3_ofs = UINT16_MAX;
    b->inner_l4_ofs = UINT16_MAX;
}

static inline uint16_t
dp_packet_l2_pad_size(const struct dp_packet *b)
{
    return b->l2_pad_size;
}

static inline void
dp_packet_set_l2_pad_size(struct dp_packet *b, uint16_t pad_size)
{
    ovs_assert(pad_size <= dp_packet_size(b));
    b->l2_pad_size = pad_size;
}

static inline void *
dp_packet_l2_5(const struct dp_packet *b)
{
    return b->l2_5_ofs != UINT16_MAX
           ? (char *) dp_packet_data(b) + b->l2_5_ofs
           : NULL;
}

static inline void
dp_packet_set_l2_5(struct dp_packet *b, void *l2_5)
{
    b->l2_5_ofs = l2_5
                  ? (char *) l2_5 - (char *) dp_packet_data(b)
                  : UINT16_MAX;
}

static inline void *
dp_packet_l3(const struct dp_packet *b)
{
    return b->l3_ofs != UINT16_MAX
           ? (char *) dp_packet_data(b) + b->l3_ofs
           : NULL;
}

static inline void
dp_packet_set_l3(struct dp_packet *b, void *l3)
{
    b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) : UINT16_MAX;
}

static inline void *
dp_packet_l4(const struct dp_packet *b)
{
    return b->l4_ofs != UINT16_MAX
           ? (char *) dp_packet_data(b) + b->l4_ofs
           : NULL;
}

static inline void
dp_packet_set_l4(struct dp_packet *b, void *l4)
{
    b->l4_ofs = l4 ? (char *) l4 - (char *) dp_packet_data(b) : UINT16_MAX;
}

/* Returns the size of the packet from the beginning of the L3 header to the
 * end of the L3 payload.  Hence L2 padding is not included. */
static inline size_t
dp_packet_l3_size(const struct dp_packet *b)
{
    return OVS_LIKELY(b->l3_ofs != UINT16_MAX)
        ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l3(b)
        - dp_packet_l2_pad_size(b)
        : 0;
}

/* Returns the size of the packet from the beginning of the L4 header to the
 * end of the L4 payload.  Hence L2 padding is not included. */
static inline size_t
dp_packet_l4_size(const struct dp_packet *b)
{
    return OVS_LIKELY(b->l4_ofs != UINT16_MAX)
        ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l4(b)
        - dp_packet_l2_pad_size(b)
        : 0;
}

static inline void *
dp_packet_inner_l3(const struct dp_packet *b)
{
    return b->inner_l3_ofs != UINT16_MAX
           ? (char *) dp_packet_data(b) + b->inner_l3_ofs
           : NULL;
}

static inline size_t
dp_packet_inner_l3_size(const struct dp_packet *b)
{
    return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
           ? (const char *) dp_packet_tail(b)
           - (const char *) dp_packet_inner_l3(b)
           - dp_packet_l2_pad_size(b)
           : 0;
}

static inline void *
dp_packet_inner_l4(const struct dp_packet *b)
{
    return b->inner_l4_ofs != UINT16_MAX
           ? (char *) dp_packet_data(b) + b->inner_l4_ofs
           : NULL;
}

static inline size_t
dp_packet_inner_l4_size(const struct dp_packet *b)
{
    return OVS_LIKELY(b->inner_l4_ofs != UINT16_MAX)
           ? (const char *) dp_packet_tail(b)
           - (const char *) dp_packet_inner_l4(b)
           - dp_packet_l2_pad_size(b)
           : 0;
}

static inline const void *
dp_packet_get_tcp_payload(const struct dp_packet *b)
{
    size_t l4_size = dp_packet_l4_size(b);

    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
        struct tcp_header *tcp = dp_packet_l4(b);
        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;

        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
            return (const char *)tcp + tcp_len;
        }
    }
    return NULL;
}

static inline const void *
dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
{
    size_t l4_size = dp_packet_inner_l4_size(b);

    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
        struct tcp_header *tcp = dp_packet_inner_l4(b);
        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;

        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
            return (const char *) tcp + tcp_len;
        }
    }
    return NULL;
}

static inline uint32_t
dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
{
    const char *tcp_payload = dp_packet_get_tcp_payload(pkt);
    if (tcp_payload) {
        return ((char *) dp_packet_tail(pkt) - dp_packet_l2_pad_size(pkt)
                - tcp_payload);
    } else {
        return 0;
    }
}

static inline const void *
dp_packet_get_udp_payload(const struct dp_packet *b)
{
    return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN)
        ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
}

static inline const void *
dp_packet_get_sctp_payload(const struct dp_packet *b)
{
    return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN)
        ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
}

static inline const void *
dp_packet_get_icmp_payload(const struct dp_packet *b)
{
    return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN)
        ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
}

static inline const void *
dp_packet_get_nd_payload(const struct dp_packet *b)
{
    return OVS_LIKELY(dp_packet_l4_size(b) >= ND_MSG_LEN)
        ? (const char *)dp_packet_l4(b) + ND_MSG_LEN : NULL;
}

#ifdef DPDK_NETDEV
static inline uint64_t *
dp_packet_ol_flags_ptr(const struct dp_packet *b)
{
    return CONST_CAST(uint64_t *, &b->mbuf.ol_flags);
}

static inline uint32_t *
dp_packet_rss_ptr(const struct dp_packet *b)
{
    return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
}

static inline uint32_t *
dp_packet_flow_mark_ptr(const struct dp_packet *b)
{
    return CONST_CAST(uint32_t *, &b->mbuf.hash.fdir.hi);
}

#else
static inline uint32_t *
dp_packet_ol_flags_ptr(const struct dp_packet *b)
{
    return CONST_CAST(uint32_t *, &b->ol_flags);
}

static inline uint32_t *
dp_packet_rss_ptr(const struct dp_packet *b)
{
    return CONST_CAST(uint32_t *, &b->rss_hash);
}

static inline uint32_t *
dp_packet_flow_mark_ptr(const struct dp_packet *b)
{
    return CONST_CAST(uint32_t *, &b->flow_mark);
}
#endif

#ifdef DPDK_NETDEV
BUILD_ASSERT_DECL(offsetof(struct dp_packet, mbuf) == 0);

static inline void
dp_packet_init_specific(struct dp_packet *p)
{
    /* This initialization is needed for packets that do not come from DPDK
     * interfaces, when vswitchd is built with --with-dpdk. */
    p->mbuf.ol_flags = p->mbuf.tx_offload = p->mbuf.packet_type = 0;
    p->mbuf.nb_segs = 1;
    p->mbuf.next = NULL;
}

static inline void *
dp_packet_base(const struct dp_packet *b)
{
    return b->mbuf.buf_addr;
}

static inline void
dp_packet_set_base(struct dp_packet *b, void *d)
{
    b->mbuf.buf_addr = d;
}

static inline uint32_t
dp_packet_size(const struct dp_packet *b)
{
    return b->mbuf.pkt_len;
}

static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
    /* netdev-dpdk does not currently support segmentation; consequently, for
     * all intents and purposes, 'data_len' (16 bit) and 'pkt_len' (32 bit) may
     * be used interchangably.
     *
     * On the datapath, it is expected that the size of packets
     * (and thus 'v') will always be <= UINT16_MAX; this means that there is no
     * loss of accuracy in assigning 'v' to 'data_len'.
     */

    ovs_assert(v <= UINT16_MAX);
    b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
    b->mbuf.pkt_len = v;             /* Total length of all segments linked to
                                      * this segment. */
}

static inline uint16_t
__packet_data(const struct dp_packet *b)
{
    return b->mbuf.data_off;
}

static inline void
__packet_set_data(struct dp_packet *b, uint16_t v)
{
    b->mbuf.data_off = v;
}

static inline uint16_t
dp_packet_get_allocated(const struct dp_packet *b)
{
    return b->mbuf.buf_len;
}

static inline void
dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
{
    b->mbuf.buf_len = s;
}

static inline uint16_t
dp_packet_get_tso_segsz(const struct dp_packet *p)
{
    return p->mbuf.tso_segsz;
}

static inline void
dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
{
    p->mbuf.tso_segsz = s;
}
#else /* DPDK_NETDEV */

static inline void
dp_packet_init_specific(struct dp_packet *p OVS_UNUSED)
{
    /* There are no implementation-specific fields for initialization. */
}

static inline void *
dp_packet_base(const struct dp_packet *b)
{
    return b->base_;
}

static inline void
dp_packet_set_base(struct dp_packet *b, void *d)
{
    b->base_ = d;
}

static inline uint32_t
dp_packet_size(const struct dp_packet *b)
{
    return b->size_;
}

static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
    b->size_ = v;
}

static inline uint16_t
__packet_data(const struct dp_packet *b)
{
    return b->data_ofs;
}

static inline void
__packet_set_data(struct dp_packet *b, uint16_t v)
{
    b->data_ofs = v;
}

static inline uint16_t
dp_packet_get_allocated(const struct dp_packet *b)
{
    return b->allocated_;
}

static inline void
dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
{
    b->allocated_ = s;
}

static inline uint16_t
dp_packet_get_tso_segsz(const struct dp_packet *p)
{
    return p->tso_segsz;
}

static inline void
dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
{
    p->tso_segsz = s;
}
#endif /* DPDK_NETDEV */

static inline void
dp_packet_reset_cutlen(struct dp_packet *b)
{
    b->cutlen = 0;
}

static inline uint32_t
dp_packet_set_cutlen(struct dp_packet *b, uint32_t max_len)
{
    if (max_len < ETH_HEADER_LEN) {
        max_len = ETH_HEADER_LEN;
    }

    if (max_len >= dp_packet_size(b)) {
        b->cutlen = 0;
    } else {
        b->cutlen = dp_packet_size(b) - max_len;
    }
    return b->cutlen;
}

static inline uint32_t
dp_packet_get_cutlen(const struct dp_packet *b)
{
    /* Always in valid range if user uses dp_packet_set_cutlen. */
    return b->cutlen;
}

static inline uint32_t
dp_packet_get_send_len(const struct dp_packet *b)
{
    return dp_packet_size(b) - dp_packet_get_cutlen(b);
}

static inline void *
dp_packet_data(const struct dp_packet *b)
{
    return __packet_data(b) != UINT16_MAX
           ? (char *) dp_packet_base(b) + __packet_data(b) : NULL;
}

static inline void
dp_packet_set_data(struct dp_packet *b, void *data)
{
    if (data) {
        __packet_set_data(b, (char *) data - (char *) dp_packet_base(b));
    } else {
        __packet_set_data(b, UINT16_MAX);
    }
}

enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */

struct dp_packet_batch {
    size_t count;
    bool trunc; /* true if the batch needs truncate. */
    struct dp_packet *packets[NETDEV_MAX_BURST];
};

static inline void
dp_packet_batch_init(struct dp_packet_batch *batch)
{
    batch->count = 0;
    batch->trunc = false;
}

static inline void
dp_packet_batch_add__(struct dp_packet_batch *batch,
                      struct dp_packet *packet, size_t limit)
{
    if (batch->count < limit) {
        batch->packets[batch->count++] = packet;
    } else {
        dp_packet_delete(packet);
    }
}

/* When the batch is full, 'packet' will be dropped and freed. */
static inline void
dp_packet_batch_add(struct dp_packet_batch *batch, struct dp_packet *packet)
{
    dp_packet_batch_add__(batch, packet, NETDEV_MAX_BURST);
}

static inline size_t
dp_packet_batch_size(const struct dp_packet_batch *batch)
{
    return batch->count;
}

/* Clear 'batch' for refill. Use dp_packet_batch_refill() to add
 * packets back into the 'batch'. */
static inline void
dp_packet_batch_refill_init(struct dp_packet_batch *batch)
{
    batch->count = 0;
};

static inline void
dp_packet_batch_refill(struct dp_packet_batch *batch,
                       struct dp_packet *packet, size_t idx)
{
    dp_packet_batch_add__(batch, packet, MIN(NETDEV_MAX_BURST, idx + 1));
}

static inline void
dp_packet_batch_init_packet(struct dp_packet_batch *batch, struct dp_packet *p)
{
    dp_packet_batch_init(batch);
    batch->count = 1;
    batch->packets[0] = p;
}

static inline bool
dp_packet_batch_is_empty(const struct dp_packet_batch *batch)
{
    return !dp_packet_batch_size(batch);
}

static inline bool
dp_packet_batch_is_full(const struct dp_packet_batch *batch)
{
    return dp_packet_batch_size(batch) == NETDEV_MAX_BURST;
}

#define DP_PACKET_BATCH_FOR_EACH(IDX, PACKET, BATCH)                \
    for (size_t IDX = 0; IDX < dp_packet_batch_size(BATCH); IDX++)  \
        if (PACKET = (BATCH)->packets[IDX], true)

/* Use this macro for cases where some packets in the 'BATCH' may be
 * dropped after going through each packet in the 'BATCH'.
 *
 * For packets to stay in the 'BATCH', they need to be refilled back
 * into the 'BATCH' by calling dp_packet_batch_refill(). Caller owns
 * the packets that are not refilled.
 *
 * Caller needs to supply 'SIZE', that stores the current number of
 * packets in 'BATCH'. It is best to declare this variable with
 * the 'const' modifier since it should not be modified by
 * the iterator.  */
#define DP_PACKET_BATCH_REFILL_FOR_EACH(IDX, SIZE, PACKET, BATCH)       \
    for (dp_packet_batch_refill_init(BATCH), IDX=0; IDX < SIZE; IDX++)  \
         if (PACKET = (BATCH)->packets[IDX], true)

static inline void
dp_packet_batch_clone(struct dp_packet_batch *dst,
                      struct dp_packet_batch *src)
{
    struct dp_packet *packet;

    dp_packet_batch_init(dst);
    DP_PACKET_BATCH_FOR_EACH (i, packet, src) {
        if (i + 1 < dp_packet_batch_size(src)) {
            OVS_PREFETCH(src->packets[i + 1]);
        }

        uint32_t headroom = dp_packet_headroom(packet);
        struct dp_packet *pkt_clone;

        pkt_clone  = dp_packet_clone_with_headroom(packet, headroom);
        dp_packet_batch_add(dst, pkt_clone);
    }
    dst->trunc = src->trunc;
}

static inline void
dp_packet_delete_batch(struct dp_packet_batch *batch, bool should_steal)
{
    if (should_steal) {
        struct dp_packet *packet;

        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
            dp_packet_delete(packet);
        }
        dp_packet_batch_init(batch);
    }
}

static inline void
dp_packet_batch_apply_cutlen(struct dp_packet_batch *batch)
{
    if (batch->trunc) {
        struct dp_packet *packet;

        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
            dp_packet_set_size(packet, dp_packet_get_send_len(packet));
            dp_packet_reset_cutlen(packet);
        }
        batch->trunc = false;
    }
}

static inline void
dp_packet_batch_reset_cutlen(struct dp_packet_batch *batch)
{
    if (batch->trunc) {
        struct dp_packet *packet;

        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
            dp_packet_reset_cutlen(packet);
        }
        batch->trunc = false;
    }
}

/* Returns the RSS hash of the packet 'p'.  Note that the returned value is
 * correct only if 'dp_packet_rss_valid(p)' returns 'true'. */
static inline uint32_t
dp_packet_get_rss_hash(const struct dp_packet *p)
{
    return *dp_packet_rss_ptr(p);
}

static inline void
dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash)
{
    *dp_packet_rss_ptr(p) = hash;
    p->has_hash = true;
}

static inline bool
dp_packet_rss_valid(const struct dp_packet *p)
{
    return p->has_hash;
}

static inline void
dp_packet_reset_offload(struct dp_packet *p)
{
    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_SUPPORTED_MASK;
    p->has_hash = p->has_mark = false;
    p->offloads = 0;
}

static inline bool
dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark)
{
    if (p->has_mark) {
        *mark = *dp_packet_flow_mark_ptr(p);
        return true;
    }

    return false;
}

static inline void
dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
{
    *dp_packet_flow_mark_ptr(p) = mark;
    p->has_mark = true;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_tunnel_geneve(const struct dp_packet *b)
{
    return (b->offloads & DP_PACKET_OL_TUNNEL_MASK)
           == DP_PACKET_OL_TUNNEL_GENEVE;
}

static inline void
dp_packet_tunnel_set_geneve(struct dp_packet *b)
{
    b->offloads &= ~DP_PACKET_OL_TUNNEL_VXLAN;
    b->offloads |= DP_PACKET_OL_TUNNEL_GENEVE;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_tunnel_vxlan(const struct dp_packet *b)
{
    return (b->offloads & DP_PACKET_OL_TUNNEL_MASK)
           == DP_PACKET_OL_TUNNEL_VXLAN;
}

static inline void
dp_packet_tunnel_set_vxlan(struct dp_packet *b)
{
    b->offloads &= ~DP_PACKET_OL_TUNNEL_GENEVE;
    b->offloads |= DP_PACKET_OL_TUNNEL_VXLAN;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_tunnel_gre(const struct dp_packet *b)
{
    return (b->offloads & DP_PACKET_OL_TUNNEL_MASK)
           == DP_PACKET_OL_TUNNEL_MASK;
}

static inline void
dp_packet_tunnel_set_gre(struct dp_packet *b)
{
    b->offloads |= DP_PACKET_OL_TUNNEL_MASK;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_tunnel(const struct dp_packet *b)
{
    return !!(b->offloads & DP_PACKET_OL_TUNNEL_MASK);
}

/* Marks packet 'p' with good IPv4 checksum. */
static inline void
dp_packet_ip_checksum_set_good(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_IP_CKSUM_BAD;
    p->offloads |= DP_PACKET_OL_IP_CKSUM_GOOD;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_ip_checksum_bad(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_IP_CKSUM_MASK)
            == DP_PACKET_OL_IP_CKSUM_BAD;
}

static inline void
dp_packet_ip_checksum_set_bad(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_IP_CKSUM_GOOD;
    p->offloads |= DP_PACKET_OL_IP_CKSUM_BAD;
}

/* Returns 'true' if the IPv4 header has good integrity but the
 * checksum in it is incomplete. */
static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_ip_checksum_partial(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_IP_CKSUM_MASK)
            == DP_PACKET_OL_IP_CKSUM_MASK;
}

/* Marks packet 'p' as having a valid IPv4 header, but no checksum. */
static inline void
dp_packet_ip_checksum_set_partial(struct dp_packet *p)
{
    p->offloads |= DP_PACKET_OL_IP_CKSUM_MASK;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_ip_checksum_unknown(const struct dp_packet *p)
{
    return !(p->offloads & DP_PACKET_OL_IP_CKSUM_MASK);
}

static inline void
dp_packet_ip_checksum_set_unknown(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_IP_CKSUM_MASK;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_ip_checksum_valid(const struct dp_packet *p)
{
    return !!(p->offloads & DP_PACKET_OL_IP_CKSUM_GOOD);
}

/* Marks packet 'p' with good inner IPv4 checksum. */
static inline void
dp_packet_inner_ip_checksum_set_good(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_INNER_IP_CKSUM_BAD;
    p->offloads |= DP_PACKET_OL_INNER_IP_CKSUM_GOOD;
}

/* Returns 'true' if the inner IPv4 header has good integrity but the
 * checksum in it is incomplete. */
static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_inner_ip_checksum_partial(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_INNER_IP_CKSUM_MASK)
            == DP_PACKET_OL_INNER_IP_CKSUM_MASK;
}

/* Marks packet 'p' as having a valid inner IPv4 header, but no checksum. */
static inline void
dp_packet_inner_ip_checksum_set_partial(struct dp_packet *p)
{
    p->offloads |= DP_PACKET_OL_INNER_IP_CKSUM_MASK;
}

/* Calculate and set the IPv4 header checksum in packet 'p'. */
static inline void
dp_packet_ip_set_header_csum(struct dp_packet *p, bool inner)
{
    struct ip_header *ip;
    size_t l3_size;
    size_t ip_len;

    if (inner) {
        ip = dp_packet_inner_l3(p);
        l3_size = dp_packet_inner_l3_size(p);
    } else {
        ip = dp_packet_l3(p);
        l3_size = dp_packet_l3_size(p);
    }

    ovs_assert(ip);

    ip_len = IP_IHL(ip->ip_ihl_ver) * 4;

    if (OVS_LIKELY(ip_len >= IP_HEADER_LEN && ip_len < l3_size)) {
        ip->ip_csum = 0;
        ip->ip_csum = csum(ip, ip_len);
    }

    if (inner) {
        dp_packet_inner_ip_checksum_set_good(p);
    } else {
        dp_packet_ip_checksum_set_good(p);
    }
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_proto_tcp(const struct dp_packet *b)
{
    return (b->offloads & DP_PACKET_OL_L4_PROTO_MASK)
            == DP_PACKET_OL_L4_PROTO_TCP;
}

static inline void
dp_packet_l4_proto_set_tcp(struct dp_packet *b)
{
    b->offloads &= ~DP_PACKET_OL_L4_PROTO_UDP;
    b->offloads |= DP_PACKET_OL_L4_PROTO_TCP;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_proto_udp(const struct dp_packet *b)
{
    return (b->offloads & DP_PACKET_OL_L4_PROTO_MASK)
            == DP_PACKET_OL_L4_PROTO_UDP;
}

static inline void
dp_packet_l4_proto_set_udp(struct dp_packet *b)
{
    b->offloads &= ~DP_PACKET_OL_L4_PROTO_TCP;
    b->offloads |= DP_PACKET_OL_L4_PROTO_UDP;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_proto_sctp(const struct dp_packet *b)
{
    return (b->offloads & DP_PACKET_OL_L4_PROTO_MASK)
            == DP_PACKET_OL_L4_PROTO_MASK;
}

static inline void
dp_packet_l4_proto_set_sctp(struct dp_packet *b)
{
    b->offloads |= DP_PACKET_OL_L4_PROTO_MASK;
}

/* Returns 'true' if the packet 'p' has good integrity and the
 * checksum in it is correct. */
static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_checksum_good(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_L4_CKSUM_MASK)
            == DP_PACKET_OL_L4_CKSUM_GOOD;
}

/* Marks packet 'p' with good L4 checksum. */
static inline void
dp_packet_l4_checksum_set_good(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_L4_CKSUM_BAD;
    p->offloads |= DP_PACKET_OL_L4_CKSUM_GOOD;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_checksum_bad(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_L4_CKSUM_MASK)
            == DP_PACKET_OL_L4_CKSUM_BAD;
}

static inline void
dp_packet_l4_checksum_set_bad(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_L4_CKSUM_GOOD;
    p->offloads |= DP_PACKET_OL_L4_CKSUM_BAD;
}

/* Returns 'true' if the packet has good integrity though the
 * checksum in the packet 'p' is not complete. */
static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_checksum_partial(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_L4_CKSUM_MASK)
            == DP_PACKET_OL_L4_CKSUM_MASK;
}

/* Marks packet 'p' with good integrity though the checksum in the
 * packet is not complete. */
static inline void
dp_packet_l4_checksum_set_partial(struct dp_packet *p)
{
    p->offloads |= DP_PACKET_OL_L4_CKSUM_MASK;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_checksum_unknown(const struct dp_packet *p)
{
    return !(p->offloads & DP_PACKET_OL_L4_CKSUM_MASK);
}

static inline void
dp_packet_l4_checksum_set_unknown(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_L4_CKSUM_MASK;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_l4_checksum_valid(const struct dp_packet *p)
{
    return !!(p->offloads & DP_PACKET_OL_L4_CKSUM_GOOD);
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_inner_l4_proto_tcp(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_INNER_L4_PROTO_MASK)
            == DP_PACKET_OL_INNER_L4_PROTO_TCP;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_inner_l4_proto_udp(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_INNER_L4_PROTO_MASK)
            == DP_PACKET_OL_INNER_L4_PROTO_UDP;
}

static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_inner_l4_proto_sctp(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_INNER_L4_PROTO_MASK)
            == DP_PACKET_OL_INNER_L4_PROTO_MASK;
}

/* Returns 'true' if the inner L4 header has good integrity and the
 * checksum in it is complete. */
static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_inner_l4_checksum_good(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_INNER_L4_CKSUM_MASK)
            == DP_PACKET_OL_INNER_L4_CKSUM_GOOD;
}

/* Marks packet 'p' as having a valid inner l4 header, but no checksum. */
static inline void
dp_packet_inner_l4_checksum_set_good(struct dp_packet *p)
{
    p->offloads &= ~DP_PACKET_OL_INNER_L4_CKSUM_BAD;
    p->offloads |= DP_PACKET_OL_INNER_L4_CKSUM_GOOD;
}

/* Returns 'true' if the inner L4 header has good integrity but the
 * checksum in it is incomplete. */
static inline bool OVS_WARN_UNUSED_RESULT
dp_packet_inner_l4_checksum_partial(const struct dp_packet *p)
{
    return (p->offloads & DP_PACKET_OL_INNER_L4_CKSUM_MASK)
            == DP_PACKET_OL_INNER_L4_CKSUM_MASK;
}

/* Marks packet 'p' as having a valid inner l4 header, but no checksum. */
static inline void
dp_packet_inner_l4_checksum_set_partial(struct dp_packet *p)
{
    p->offloads |= DP_PACKET_OL_INNER_L4_CKSUM_MASK;
}

static inline void
dp_packet_reset_packet(struct dp_packet *b, int off)
{
    dp_packet_set_size(b, dp_packet_size(b) - off);
    dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
    dp_packet_reset_offsets(b);
}

static inline uint32_t ALWAYS_INLINE
dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
                         uint32_t hash)
{
    const void *ipv4_src = &pkt[l3_ofs + offsetof(struct ip_header, ip_src)];
    const void *ipv4_dst = &pkt[l3_ofs + offsetof(struct ip_header, ip_dst)];
    uint32_t ip_src, ip_dst;

    memcpy(&ip_src, ipv4_src, sizeof ip_src);
    memcpy(&ip_dst, ipv4_dst, sizeof ip_dst);

    /* IPv4 Src and Dst. */
    hash = hash_add(hash, ip_src);
    hash = hash_add(hash, ip_dst);

    /* IPv4 proto. */
    hash = hash_add(hash, pkt[l3_ofs + offsetof(struct ip_header, ip_proto)]);

    return hash;
}

static inline void ALWAYS_INLINE
dp_packet_update_rss_hash_ipv4(struct dp_packet *packet)
{
    if (dp_packet_rss_valid(packet)) {
        return;
    }

    const uint8_t *pkt = dp_packet_data(packet);
    const uint16_t l3_ofs = packet->l3_ofs;
    uint32_t hash = 0;

    /* IPv4 Src, Dst and proto. */
    hash = dp_packet_calc_hash_ipv4(pkt, l3_ofs, hash);

    hash = hash_finish(hash, 42);
    dp_packet_set_rss_hash(packet, hash);
}

static inline void ALWAYS_INLINE
dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet)
{
    if (dp_packet_rss_valid(packet)) {
        return;
    }

    const uint8_t *pkt = dp_packet_data(packet);
    const void *l4_ports = &pkt[packet->l4_ofs];
    const uint16_t l3_ofs = packet->l3_ofs;
    uint32_t hash = 0;
    uint32_t ports;

    /* IPv4 Src, Dst and proto. */
    hash = dp_packet_calc_hash_ipv4(pkt, l3_ofs, hash);

    /* L4 ports. */
    memcpy(&ports,  l4_ports, sizeof ports);
    hash = hash_add(hash, ports);

    hash = hash_finish(hash, 42);
    dp_packet_set_rss_hash(packet, hash);
}

static inline void ALWAYS_INLINE
dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
{
    if (dp_packet_rss_valid(packet)) {
        return;
    }

    const uint8_t *pkt = dp_packet_data(packet);
    const uint16_t l3_ofs = packet->l3_ofs;
    uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
    uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
    uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
                                       ip6_ctlun.ip6_un1.ip6_un1_nxt);
    const void *ipv6_src_l = &pkt[l3_ofs + ipv6_src_off];
    const void *ipv6_src_h = &pkt[l3_ofs + ipv6_src_off + 8];
    const void *ipv6_dst_l = &pkt[l3_ofs + ipv6_dst_off];
    const void *ipv6_dst_h = &pkt[l3_ofs + ipv6_dst_off + 8];
    const void *l4_ports = &pkt[packet->l4_ofs];
    uint64_t ipv6_src_lo, ipv6_src_hi;
    uint64_t ipv6_dst_lo, ipv6_dst_hi;
    uint32_t ports;
    uint32_t hash = 0;

    memcpy(&ipv6_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
    memcpy(&ipv6_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
    memcpy(&ipv6_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
    memcpy(&ipv6_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
    memcpy(&ports, l4_ports, sizeof ports);

    /* IPv6 Src and Dst. */
    hash = hash_add64(hash, ipv6_src_lo);
    hash = hash_add64(hash, ipv6_src_hi);
    hash = hash_add64(hash, ipv6_dst_lo);
    hash = hash_add64(hash, ipv6_dst_hi);
    /* IPv6 proto. */
    hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
    /* L4 ports. */
    hash = hash_add(hash, ports);
    hash = hash_finish(hash, 42);

    dp_packet_set_rss_hash(packet, hash);
}

#ifdef  __cplusplus
}
#endif

#endif /* dp-packet.h */
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								/*
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								#ifndef DPBUF_H
 								#define DPBUF_H 1
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								#include <stddef.h>
 								#include <stdint.h>
-												dpdk: New module with some code from netdev-dpdk.

There's a lot of code in netdev-dpdk which is not at all related to the
netdev interface, mostly the library initialization code.

This commit moves it to a new 'dpdk' module, to simplify 'netdev-dpdk'.

Also a new module 'dpdk-stub' is introduced to implement some functions
when DPDK is not available.  This replaces the old 'netdev-nodpdk'
module.

Some redundant includes are removed or reorganized as a consequence.

No functional change.

CC: Aaron Conole <aconole@redhat.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2016-10-04 17:58:05 -07:00
 								#ifdef DPDK_NETDEV
 								#include <rte_config.h>
 								#include <rte_mbuf.h>
 								#endif
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								#include "csum.h"
-												netdev-afxdp: add new netdev type for AF_XDP.

The patch introduces experimental AF_XDP support for OVS netdev.
AF_XDP, the Address Family of the eXpress Data Path, is a new Linux socket
type built upon the eBPF and XDP technology.  It is aims to have comparable
performance to DPDK but cooperate better with existing kernel's networking
stack.  An AF_XDP socket receives and sends packets from an eBPF/XDP program
attached to the netdev, by-passing a couple of Linux kernel's subsystems
As a result, AF_XDP socket shows much better performance than AF_PACKET
For more details about AF_XDP, please see linux kernel's
Documentation/networking/af_xdp.rst. Note that by default, this feature is
not compiled in.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2019-07-18 13:11:14 -07:00
+								#include "netdev-afxdp.h"
-												dpdk: New module with some code from netdev-dpdk.

There's a lot of code in netdev-dpdk which is not at all related to the
netdev interface, mostly the library initialization code.

This commit moves it to a new 'dpdk' module, to simplify 'netdev-dpdk'.

Also a new module 'dpdk-stub' is introduced to implement some functions
when DPDK is not available.  This replaces the old 'netdev-nodpdk'
module.

Some redundant includes are removed or reorganized as a consequence.

No functional change.

CC: Aaron Conole <aconole@redhat.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2016-10-04 17:58:05 -07:00
+								#include "netdev-dpdk.h"
-												list: Remove lib/list.h completely.

All code is now in include/openvswitch/list.h.

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:21 -07:00
+								#include "openvswitch/list.h"
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								#include "packets.h"
 								#include "util.h"
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								#include "flow.h"
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
 								#ifdef  __cplusplus
 								extern "C" {
 								#endif
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								enum OVS_PACKED_ENUM dp_packet_source {
 								    DPBUF_MALLOC,              /* Obtained via malloc(). */
 								    DPBUF_STACK,               /* Un-movable stack space or static buffer. */
 								    DPBUF_STUB,                /* Starts on stack, may expand into heap. */
 								    DPBUF_DPDK,                /* buffer data is from DPDK allocated memory.
-												dp-packet: Use memcpy on dp_packet elements.

memcpy replaces the several single copies inside
dp_packet_clone_with_headroom().

Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Darrell Ball <dlu998@gmail.com>

											
										
										
											2017-08-25 00:37:33 -07:00
+								                                * ref to dp_packet_init_dpdk() in dp-packet.c.
 								                                */
-												netdev-afxdp: add new netdev type for AF_XDP.

The patch introduces experimental AF_XDP support for OVS netdev.
AF_XDP, the Address Family of the eXpress Data Path, is a new Linux socket
type built upon the eBPF and XDP technology.  It is aims to have comparable
performance to DPDK but cooperate better with existing kernel's networking
stack.  An AF_XDP socket receives and sends packets from an eBPF/XDP program
attached to the netdev, by-passing a couple of Linux kernel's subsystems
As a result, AF_XDP socket shows much better performance than AF_PACKET
For more details about AF_XDP, please see linux kernel's
Documentation/networking/af_xdp.rst. Note that by default, this feature is
not compiled in.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2019-07-18 13:11:14 -07:00
+								    DPBUF_AFXDP,               /* Buffer data from XDP frame. */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								};
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dp-packet: Add private data

This scratchpad can be used by any layer to keep private data.
STT will use it for TCP reassembly state.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:17 -07:00
+								#define DP_PACKET_CONTEXT_SIZE 64
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								#ifdef DPDK_NETDEV
 								#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF
 								#else
 								#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF
 								#endif
-												dp-packet: Rework TCP segmentation.

Rather than mark with a offload flags + mark with a segmentation size,
simply rely on the netdev implementation which sets a segmentation size
when appropriate.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:21:00 +02:00
+								#define DP_PACKET_OL_SUPPORTED_MASK 0
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
 								/* Bit masks for the 'offloads' member of the 'dp_packet' structure. */
 								enum OVS_PACKED_ENUM dp_packet_offload_mask {
 								    /* Bad IP checksum in the packet. */
 								    DP_PACKET_OL_IP_CKSUM_BAD = UINT16_C(1) << 4,
 								    /* Valid IP checksum in the packet. */
 								    DP_PACKET_OL_IP_CKSUM_GOOD = UINT16_C(1) << 7,
 								    /* Bad L4 checksum in the packet. */
 								    DP_PACKET_OL_L4_CKSUM_BAD = UINT16_C(1) << 3,
 								    /* Valid L4 checksum in the packet. */
 								    DP_PACKET_OL_L4_CKSUM_GOOD = UINT16_C(1) << 8,
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    /* Protocol corresponding to above L4 checksums. */
 								    DP_PACKET_OL_L4_PROTO_TCP = UINT16_C(1) << 9,
 								    DP_PACKET_OL_L4_PROTO_UDP = UINT16_C(1) << 10,
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
+								    /* Bits for marking a packet as tunneled. */
 								    DP_PACKET_OL_TUNNEL_GENEVE = UINT16_C(1) << 11,
 								    DP_PACKET_OL_TUNNEL_VXLAN = UINT16_C(1) << 12,
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
 								#define DP_PACKET_OL_SHIFT_COUNT 16
 								    /* Inner offloads. */
 								    DP_PACKET_OL_INNER_IP_CKSUM_BAD =
 								        DP_PACKET_OL_IP_CKSUM_BAD << DP_PACKET_OL_SHIFT_COUNT,
 								    DP_PACKET_OL_INNER_IP_CKSUM_GOOD =
 								        DP_PACKET_OL_IP_CKSUM_GOOD << DP_PACKET_OL_SHIFT_COUNT,
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    DP_PACKET_OL_INNER_L4_CKSUM_BAD =
 								        DP_PACKET_OL_L4_CKSUM_BAD << DP_PACKET_OL_SHIFT_COUNT,
 								    DP_PACKET_OL_INNER_L4_CKSUM_GOOD =
 								        DP_PACKET_OL_L4_CKSUM_GOOD << DP_PACKET_OL_SHIFT_COUNT,
 								    DP_PACKET_OL_INNER_L4_PROTO_TCP =
 								        DP_PACKET_OL_L4_PROTO_TCP << DP_PACKET_OL_SHIFT_COUNT,
 								    DP_PACKET_OL_INNER_L4_PROTO_UDP =
 								        DP_PACKET_OL_L4_PROTO_UDP << DP_PACKET_OL_SHIFT_COUNT,
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								};
 								#ifdef DPDK_NETDEV
 								BUILD_ASSERT_DECL(DP_PACKET_OL_IP_CKSUM_BAD == RTE_MBUF_F_RX_IP_CKSUM_BAD);
 								BUILD_ASSERT_DECL(DP_PACKET_OL_IP_CKSUM_GOOD == RTE_MBUF_F_RX_IP_CKSUM_GOOD);
 								BUILD_ASSERT_DECL(DP_PACKET_OL_L4_CKSUM_BAD == RTE_MBUF_F_RX_L4_CKSUM_BAD);
 								BUILD_ASSERT_DECL(DP_PACKET_OL_L4_CKSUM_GOOD == RTE_MBUF_F_RX_L4_CKSUM_GOOD);
 								#endif
 								#define DP_PACKET_OL_IP_CKSUM_MASK (DP_PACKET_OL_IP_CKSUM_GOOD \
 								                                    | DP_PACKET_OL_IP_CKSUM_BAD)
 								#define DP_PACKET_OL_L4_CKSUM_MASK (DP_PACKET_OL_L4_CKSUM_GOOD \
 								                                    | DP_PACKET_OL_L4_CKSUM_BAD)
-												dp-packet: Refactor offloading API.

1. No reason to have mbuf related APIs in a generic code.
2. Not only RSS/checksums should be invalidated in case of tunnel
   decapsulation or sending to 'ring' ports.

In order to fix two above issues, new function
'dp_packet_reset_offload' introduced. In order to clean up/unify
the code and simplify addition of new offloading features to non-DPDK
version of dp_packet, introduced 'ol_flags' bitmask. Additionally
reduced code complexity in 'dp_packet_clone_with_headroom' by using
already existent generic APIs.

Unfortunately, we still need to have a special case for mbuf
initialization inside 'dp_packet_init__()'.
'dp_packet_init_specific()' introduced for this purpose as a generic
API for initialization of the implementation-specific fields.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2019-02-26 13:38:37 +03:00
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
+								#define DP_PACKET_OL_TUNNEL_MASK (DP_PACKET_OL_TUNNEL_GENEVE \
 								                                  | DP_PACKET_OL_TUNNEL_VXLAN)
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								#define DP_PACKET_OL_L4_PROTO_MASK (DP_PACKET_OL_L4_PROTO_TCP \
 								                                    | DP_PACKET_OL_L4_PROTO_UDP)
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								#define DP_PACKET_OL_INNER_IP_CKSUM_MASK (DP_PACKET_OL_INNER_IP_CKSUM_GOOD \
 								                                          | DP_PACKET_OL_INNER_IP_CKSUM_BAD)
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								#define DP_PACKET_OL_INNER_L4_CKSUM_MASK (DP_PACKET_OL_INNER_L4_CKSUM_GOOD \
 								                                          | DP_PACKET_OL_INNER_L4_CKSUM_BAD)
 								#define DP_PACKET_OL_INNER_L4_PROTO_MASK (DP_PACKET_OL_INNER_L4_PROTO_TCP \
 								                                          | DP_PACKET_OL_INNER_L4_PROTO_UDP)
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								/* Buffer for holding packet data.  A dp_packet is automatically reallocated
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								 * as necessary if it grows too large for the available memory.
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								 * By default the packet type is set to Ethernet (PT_ETH).
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								 */
-												dpif_packet: Rename to dp_packet

dp_packet is short and better name for datapath packet
structure.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-02-25 12:01:53 -08:00
+								struct dp_packet {
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								#ifdef DPDK_NETDEV
 								    struct rte_mbuf mbuf;       /* DPDK mbuf */
 								#else
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    void *base_;                /* First byte of allocated space. */
-												dp-packet: Merge 'allocated' member with DPDK mbuf 'buf_len'.

DPDK buf_len is only 16-bit wide ('allocated' was 32-bit), but it should
be enough to store the number of allocated bytes.

This will reduce 'struct dp_packet' size.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:47 -07:00
+								    uint16_t allocated_;        /* Number of bytes allocated. */
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    uint16_t data_ofs;          /* First byte actually in use. */
 								    uint32_t size_;             /* Number of bytes in use. */
-												dp-packet: Refactor offloading API.

1. No reason to have mbuf related APIs in a generic code.
2. Not only RSS/checksums should be invalidated in case of tunnel
   decapsulation or sending to 'ring' ports.

In order to fix two above issues, new function
'dp_packet_reset_offload' introduced. In order to clean up/unify
the code and simplify addition of new offloading features to non-DPDK
version of dp_packet, introduced 'ol_flags' bitmask. Additionally
reduced code complexity in 'dp_packet_clone_with_headroom' by using
already existent generic APIs.

Unfortunately, we still need to have a special case for mbuf
initialization inside 'dp_packet_init__()'.
'dp_packet_init_specific()' introduced for this purpose as a generic
API for initialization of the implementation-specific fields.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2019-02-26 13:38:37 +03:00
+								    uint32_t ol_flags;          /* Offloading flags. */
-												dp-packet: Rename 'dp_hash' in 'rss_hash'.

We already have the 'dp_hash' embedded in the metadata.  This caused
confusion in the code.  With this commit it should be clear that
'rss_hash' is the packet hash used for internal purposes, while
'md.dp_hash' is part of the flow, computed during the execution of
certain actions.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-04-15 19:11:48 +01:00
+								    uint32_t rss_hash;          /* Packet hash. */
-												dp-packet: Add flow_mark support for non-DPDK case.

Additionally, new API call 'dp_packet_set_flow_mark' is needed
for packet clone. Mostly for dummy HWOL implementation.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2019-02-26 13:38:38 +03:00
+								    uint32_t flow_mark;         /* Packet flow mark. */
-												userspace: Respect tso/gso segment size.

Currently OVS will calculate the segment size based on the
MTU of the egress port. That usually happens to be correct
when the ports share the same MTU, but that is not always true.

Therefore, if the segment size is provided, then use that and
make sure the over sized packets are dropped.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-11-21 14:26:51 -05:00
+								    uint16_t tso_segsz;         /* TCP segment size. */
-												packet-dpif: Add dpif_packet_{get, set}_hash()

These function are used to stored the packet hash. 'netdev-dpdk'
automatically set this value to the RSS hash returned by the
NIC. Other 'netdev's set it to 0 (which is an invalid hash
value), so that callers can compute the hash on their own.

If DPDK support is enabled, struct dpif_packet's member
'dp_hash' is removed and 'pkt.hash.rss' from DPDK mbuf is used

This commit also configure DPDK devices to compute RSS hash
for UDP and IPv6 packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:42 -07:00
+								#endif
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    bool has_hash;                 /* Is the 'rss_hash' valid? */
 								    bool has_mark;                 /* Is the 'flow_mark' valid? */
-												dp-packet: Use memcpy on dp_packet elements.

memcpy replaces the several single copies inside
dp_packet_clone_with_headroom().

Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Darrell Ball <dlu998@gmail.com>

											
										
										
											2017-08-25 00:37:33 -07:00
 								    /* All the following elements of this struct are copied in a single call
 								     * of memcpy in dp_packet_clone_with_headroom. */
-												flow: Support extra padding length.

Although not required, padding can be optionally added until
the packet length is MTU bytes. A packet with extra padding
currently fails sanity checks.

Vulnerability: CVE-2020-35498
Fixes: fa8d9001a624 ("miniflow_extract: Properly handle small IP packets.")
Reported-by: Joakim Hindersson <joakim.hindersson@elastx.se>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-10-26 16:03:19 -03:00
+								    uint16_t l2_pad_size;          /* Detected l2 padding size.
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								                                    * Padding is non-pullable. */
 								    uint16_t l2_5_ofs;             /* MPLS label stack offset, or UINT16_MAX */
 								    uint16_t l3_ofs;               /* Network-level header offset,
 								                                    * or UINT16_MAX. */
 								    uint16_t l4_ofs;               /* Transport-level header offset,
 								                                      or UINT16_MAX. */
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								    uint16_t inner_l3_ofs;         /* Inner Network-level header offset,
 								                                    * or UINT16_MAX. */
 								    uint16_t inner_l4_ofs;         /* Inner Transport-level header offset,
 								                                      or UINT16_MAX. */
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								    uint32_t cutlen;               /* length in bytes to cut from the end. */
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								    ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    enum OVS_PACKED_ENUM dp_packet_offload_mask offloads;
 								                                   /* Checksums status and offloads. */
-												dp-packet: Add private data

This scratchpad can be used by any layer to keep private data.
STT will use it for TCP reassembly state.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:17 -07:00
+								    union {
 								        struct pkt_metadata md;
 								        uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
 								    };
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								};
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct dp_packet, offloads)
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								                  == sizeof(uint32_t));
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
-												netdev-afxdp: add new netdev type for AF_XDP.

The patch introduces experimental AF_XDP support for OVS netdev.
AF_XDP, the Address Family of the eXpress Data Path, is a new Linux socket
type built upon the eBPF and XDP technology.  It is aims to have comparable
performance to DPDK but cooperate better with existing kernel's networking
stack.  An AF_XDP socket receives and sends packets from an eBPF/XDP program
attached to the netdev, by-passing a couple of Linux kernel's subsystems
As a result, AF_XDP socket shows much better performance than AF_PACKET
For more details about AF_XDP, please see linux kernel's
Documentation/networking/af_xdp.rst. Note that by default, this feature is
not compiled in.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2019-07-18 13:11:14 -07:00
+								#if HAVE_AF_XDP
 								struct dp_packet_afxdp {
 								    struct umem_pool *mpool;
 								    struct dp_packet packet;
 								};
 								#endif
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *dp_packet_data(const struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_set_data(struct dp_packet *, void *);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *dp_packet_base(const struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_set_base(struct dp_packet *, void *);
 								static inline uint32_t dp_packet_size(const struct dp_packet *);
 								static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
-												dp-packet: Merge 'allocated' member with DPDK mbuf 'buf_len'.

DPDK buf_len is only 16-bit wide ('allocated' was 32-bit), but it should
be enough to store the number of allocated bytes.

This will reduce 'struct dp_packet' size.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:47 -07:00
+								static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
 								static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
-												userspace: Respect tso/gso segment size.

Currently OVS will calculate the segment size based on the
MTU of the egress port. That usually happens to be correct
when the ports share the same MTU, but that is not always true.

Therefore, if the segment size is provided, then use that and
make sure the over sized packets are dropped.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-11-21 14:26:51 -05:00
+								static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
 								static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								void *dp_packet_resize_l2(struct dp_packet *, int increment);
 								void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								static inline void *dp_packet_eth(const struct dp_packet *);
-												dpif-netdev: Preserve inner offloads on recirculation.

Rather than drop all pending Tx offloads on recirculation,
preserve inner offloads (and mark packet with outer Tx offloads)
when parsing the packet again.

Fixes: c6538b443984 ("dpif-netdev: Fix crash due to tunnel offloading on recirculation.")
Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Reported-at: https://issues.redhat.com/browse/FDP-1144
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-23 16:56:27 +01:00
+								static inline void dp_packet_reset_outer_offsets(struct dp_packet *);
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								static inline void dp_packet_reset_offsets(struct dp_packet *);
-												dp-packet: Reset offload/offsets when clearing a packet.

The OVN test suite identified a bug in dp_packet_ol_send_prepare() where
a BFD packet flagged as double encapsulated would trigger a seg fault.
The problem surfaced because bfd_put_packet was reusing a packet
allocated on the stack that wasn't having its flags reset between calls.

This change will reset OL flags as well as the layer offsets in
data_clear(), which should fix this type of packet reuse issue in
general as long as data_clear() is called in between uses.

Fixes: 8b5fe2dc6080 ("userspace: Add Generic Segmentation Offloading.")
Reported-by: Dumitru Ceara <dceara@redhat.com>
Reported-at: https://issues.redhat.com/browse/FDP-300
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-25 16:46:53 -05:00
+								static inline void dp_packet_reset_offload(struct dp_packet *);
-												flow: Support extra padding length.

Although not required, padding can be optionally added until
the packet length is MTU bytes. A packet with extra padding
currently fails sanity checks.

Vulnerability: CVE-2020-35498
Fixes: fa8d9001a624 ("miniflow_extract: Properly handle small IP packets.")
Reported-by: Joakim Hindersson <joakim.hindersson@elastx.se>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-10-26 16:03:19 -03:00
+								static inline uint16_t dp_packet_l2_pad_size(const struct dp_packet *);
 								static inline void dp_packet_set_l2_pad_size(struct dp_packet *, uint16_t);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *dp_packet_l2_5(const struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_set_l2_5(struct dp_packet *, void *);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *dp_packet_l3(const struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_set_l3(struct dp_packet *, void *);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *dp_packet_l4(const struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_set_l4(struct dp_packet *, void *);
 								static inline size_t dp_packet_l4_size(const struct dp_packet *);
 								static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *);
 								static inline const void *dp_packet_get_udp_payload(const struct dp_packet *);
 								static inline const void *dp_packet_get_sctp_payload(const struct dp_packet *);
 								static inline const void *dp_packet_get_icmp_payload(const struct dp_packet *);
 								static inline const void *dp_packet_get_nd_payload(const struct dp_packet *);
 								void dp_packet_use(struct dp_packet *, void *, size_t);
 								void dp_packet_use_stub(struct dp_packet *, void *, size_t);
 								void dp_packet_use_const(struct dp_packet *, const void *, size_t);
-												netdev-afxdp: add new netdev type for AF_XDP.

The patch introduces experimental AF_XDP support for OVS netdev.
AF_XDP, the Address Family of the eXpress Data Path, is a new Linux socket
type built upon the eBPF and XDP technology.  It is aims to have comparable
performance to DPDK but cooperate better with existing kernel's networking
stack.  An AF_XDP socket receives and sends packets from an eBPF/XDP program
attached to the netdev, by-passing a couple of Linux kernel's subsystems
As a result, AF_XDP socket shows much better performance than AF_PACKET
For more details about AF_XDP, please see linux kernel's
Documentation/networking/af_xdp.rst. Note that by default, this feature is
not compiled in.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2019-07-18 13:11:14 -07:00
+								#if HAVE_AF_XDP
 								void dp_packet_use_afxdp(struct dp_packet *, void *, size_t, size_t);
 								#endif
-												dp-packet: Fix allocated size on DPDK init.

When enabled with DPDK OvS deals with two types of packets, the ones
coming from the mempool and the ones locally created by OvS - which are
copied to mempool mbufs before output. In the latter, the space is
allocated from the system, while in the former the mbufs are allocated
from a mempool, which takes care of initialising them appropriately.

In the current implementation, during mempool's initialisation of mbufs,
dp_packet_set_allocated() is called from dp_packet_init_dpdk() without
considering that the allocated space, in the case of multi-segment
mbufs, might be greater than a single mbuf.  Furthermore, given that
dp_packet_init_dpdk() is on the code path that's called upon mempool's
initialisation, a call to dp_packet_set_allocated() is redundant, since
mempool takes care of initialising it.

To fix this, dp_packet_set_allocated() is no longer called after
initialisation of a mempool, only in dp_packet_init__(), which is still
called by OvS when initialising locally created packets.

Signed-off-by: Tiago Lam <tiago.lam@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-11-02 09:06:34 +00:00
+								void dp_packet_init_dpdk(struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
 								void dp_packet_init(struct dp_packet *, size_t);
 								void dp_packet_uninit(struct dp_packet *);
 								struct dp_packet *dp_packet_new(size_t);
 								struct dp_packet *dp_packet_new_with_headroom(size_t, size_t headroom);
 								struct dp_packet *dp_packet_clone(const struct dp_packet *);
 								struct dp_packet *dp_packet_clone_with_headroom(const struct dp_packet *,
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								                                                size_t headroom);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								struct dp_packet *dp_packet_clone_data(const void *, size_t);
 								struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								                                                     size_t headroom);
-												netdev-linux: Prepend the std packet in the TSO packet

Usually TSO packets are close to 50k, 60k bytes long, so to
to copy less bytes when receiving a packet from the kernel
change the approach. Instead of extending the MTU sized
packet received and append with remaining TSO data from
the TSO buffer, allocate a TSO packet with enough headroom
to prepend the std packet data.

Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support")
Suggested-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-02-03 18:45:50 -03:00
+								void dp_packet_resize(struct dp_packet *b, size_t new_headroom,
 								                      size_t new_tailroom);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_delete(struct dp_packet *);
-												dpif-netdev: Fix crash when PACKET_OUT is metered.

When a PACKET_OUT has output port of OFPP_TABLE, and the rule
table includes a meter and this causes the packet to be deleted,
execute with a clone of the packet, restoring the original packet
if it is changed by the execution.

Add tests to verify the original issue is fixed, and that the fix
doesn't break tunnel processing.

Reported-by: Tony van der Peet <tony.vanderpeet@alliedtelesis.co.nz>
Signed-off-by: Tony van der Peet <tony.vanderpeet@alliedtelesis.co.nz>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-08-16 17:10:07 +12:00
+								static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
 								static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								                                 size_t size);
 								static inline void *dp_packet_at_assert(const struct dp_packet *,
 								                                        size_t offset, size_t size);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void *dp_packet_tail(const struct dp_packet *);
 								static inline void *dp_packet_end(const struct dp_packet *);
 								void *dp_packet_put_uninit(struct dp_packet *, size_t);
 								void *dp_packet_put_zeros(struct dp_packet *, size_t);
 								void *dp_packet_put(struct dp_packet *, const void *, size_t);
 								char *dp_packet_put_hex(struct dp_packet *, const char *s, size_t *n);
 								void dp_packet_reserve(struct dp_packet *, size_t);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								void dp_packet_reserve_with_tailroom(struct dp_packet *, size_t headroom,
 								                                     size_t tailroom);
 								void *dp_packet_push_uninit(struct dp_packet *, size_t);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								void *dp_packet_push_zeros(struct dp_packet *, size_t);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								void *dp_packet_push(struct dp_packet *, const void *, size_t);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
 								static inline size_t dp_packet_headroom(const struct dp_packet *);
 								static inline size_t dp_packet_tailroom(const struct dp_packet *);
 								void dp_packet_prealloc_headroom(struct dp_packet *, size_t);
 								void dp_packet_prealloc_tailroom(struct dp_packet *, size_t);
 								void dp_packet_shift(struct dp_packet *, int);
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void dp_packet_clear(struct dp_packet *);
 								static inline void *dp_packet_pull(struct dp_packet *, size_t);
 								static inline void *dp_packet_try_pull(struct dp_packet *, size_t);
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								void *dp_packet_steal_data(struct dp_packet *);
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline bool dp_packet_equal(const struct dp_packet *,
 								                                   const struct dp_packet *);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
-												odp-execute: Add auto validation function for actions.

This commit introduced the auto-validation function which
allows users to compare the batch of packets obtained from
different action implementations against the linear
action implementation.

The autovalidator function can be triggered at runtime using the
following command:

$ ovs-appctl odp-execute/action-impl-set autovalidator

Signed-off-by: Emma Finn <emma.finn@intel.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-07-15 10:16:16 +00:00
+								bool dp_packet_compare_offsets(struct dp_packet *good,
 								                               struct dp_packet *test,
 								                               struct ds *err_str);
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t);
-												odp-execute: Add auto validation function for actions.

This commit introduced the auto-validation function which
allows users to compare the batch of packets obtained from
different action implementations against the linear
action implementation.

The autovalidator function can be triggered at runtime using the
following command:

$ ovs-appctl odp-execute/action-impl-set autovalidator

Signed-off-by: Emma Finn <emma.finn@intel.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-07-15 10:16:16 +00:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
 								/* Frees memory that 'b' points to, as well as 'b' itself. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_delete(struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    if (b) {
 								        if (b->source == DPBUF_DPDK) {
-												netdev-dpdk: Cleanup code when DPDK is disabled.

Remove one unused stub: netdev_dpdk_register() can't be called if DPDK
is disabled at build time.

Remove unneeded #ifdef in call to free_dpdk_buf.
Drop unneeded cast when calling free_dpdk_buf.

Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-08-25 12:25:24 +02:00
+								            free_dpdk_buf(b);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								            return;
 								        }
-												netdev-afxdp: add new netdev type for AF_XDP.

The patch introduces experimental AF_XDP support for OVS netdev.
AF_XDP, the Address Family of the eXpress Data Path, is a new Linux socket
type built upon the eBPF and XDP technology.  It is aims to have comparable
performance to DPDK but cooperate better with existing kernel's networking
stack.  An AF_XDP socket receives and sends packets from an eBPF/XDP program
attached to the netdev, by-passing a couple of Linux kernel's subsystems
As a result, AF_XDP socket shows much better performance than AF_PACKET
For more details about AF_XDP, please see linux kernel's
Documentation/networking/af_xdp.rst. Note that by default, this feature is
not compiled in.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2019-07-18 13:11:14 -07:00
+								        if (b->source == DPBUF_AFXDP) {
 								            free_afxdp_buf(b);
 								            return;
 								        }
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								        dp_packet_uninit(b);
-												dp-packet: Allocate on cacheline boundary with DPDK.

UB Sanitizer report:
lib/dp-packet.h:587:22: runtime error: member access within misaligned
address 0x000001ecde10 for type 'struct dp_packet', which requires 64
byte alignment

    #0 in dp_packet_set_base lib/dp-packet.h:587
    #1 in dp_packet_use__ lib/dp-packet.c:46
    #2 in dp_packet_use lib/dp-packet.c:60
    #3 in dp_packet_init lib/dp-packet.c:126
    #4 in dp_packet_new lib/dp-packet.c:150
    [...]

Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-30 17:04:16 -05:00
+								#ifdef DPDK_NETDEV
 								        free_cacheline(b);
 								#else
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								        free(b);
-												dp-packet: Allocate on cacheline boundary with DPDK.

UB Sanitizer report:
lib/dp-packet.h:587:22: runtime error: member access within misaligned
address 0x000001ecde10 for type 'struct dp_packet', which requires 64
byte alignment

    #0 in dp_packet_set_base lib/dp-packet.h:587
    #1 in dp_packet_use__ lib/dp-packet.c:46
    #2 in dp_packet_use lib/dp-packet.c:60
    #3 in dp_packet_init lib/dp-packet.c:126
    #4 in dp_packet_new lib/dp-packet.c:150
    [...]

Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-30 17:04:16 -05:00
+								#endif
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    }
 								}
-												dpif-netdev: Fix crash when PACKET_OUT is metered.

When a PACKET_OUT has output port of OFPP_TABLE, and the rule
table includes a meter and this causes the packet to be deleted,
execute with a clone of the packet, restoring the original packet
if it is changed by the execution.

Add tests to verify the original issue is fixed, and that the fix
doesn't break tunnel processing.

Reported-by: Tony van der Peet <tony.vanderpeet@alliedtelesis.co.nz>
Signed-off-by: Tony van der Peet <tony.vanderpeet@alliedtelesis.co.nz>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-08-16 17:10:07 +12:00
+								/* Swaps content of two packets. */
 								static inline void
 								dp_packet_swap(struct dp_packet *a, struct dp_packet *b)
 								{
 								    ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB);
 								    ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB);
 								    struct dp_packet c = *a;
 								    *a = *b;
 								    *b = c;
 								}
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
 								 * byte 'offset'.  Otherwise, returns a null pointer. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								{
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								    return offset + size <= dp_packet_size(b)
 								           ? (char *) dp_packet_data(b) + offset
 								           : NULL;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								/* Returns a pointer to byte 'offset' in 'b', which must contain at least
 								 * 'offset + size' bytes of data. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    ovs_assert(offset + size <= dp_packet_size(b));
 								    return ((char *) dp_packet_data(b)) + offset;
 								}
 								/* Returns a pointer to byte following the last byte of data in use in 'b'. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_tail(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return (char *) dp_packet_data(b) + dp_packet_size(b);
 								}
 								/* Returns a pointer to byte following the last byte allocated for use (but
 								 * not necessarily in use) in 'b'. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_end(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Merge 'allocated' member with DPDK mbuf 'buf_len'.

DPDK buf_len is only 16-bit wide ('allocated' was 32-bit), but it should
be enough to store the number of allocated bytes.

This will reduce 'struct dp_packet' size.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:47 -07:00
+								    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
 								/* Returns the number of bytes of headroom in 'b', that is, the number of bytes
 								 * of unused space in dp_packet 'b' before the data that is in use.  (Most
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								 * commonly, the data in a dp_packet is at its beginning, and thus the
 								 * dp_packet's headroom is 0.) */
 								static inline size_t
 								dp_packet_headroom(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								    return (char *) dp_packet_data(b) - (char *) dp_packet_base(b);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								/* Returns the number of bytes that may be appended to the tail end of
 								 * dp_packet 'b' before the dp_packet must be reallocated. */
 								static inline size_t
 								dp_packet_tailroom(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								    return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
 								/* Clears any data from 'b'. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_clear(struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    dp_packet_set_data(b, dp_packet_base(b));
 								    dp_packet_set_size(b, 0);
-												dp-packet: Reset offload/offsets when clearing a packet.

The OVN test suite identified a bug in dp_packet_ol_send_prepare() where
a BFD packet flagged as double encapsulated would trigger a seg fault.
The problem surfaced because bfd_put_packet was reusing a packet
allocated on the stack that wasn't having its flags reset between calls.

This change will reset OL flags as well as the layer offsets in
data_clear(), which should fix this type of packet reuse issue in
general as long as data_clear() is called in between uses.

Fixes: 8b5fe2dc6080 ("userspace: Add Generic Segmentation Offloading.")
Reported-by: Dumitru Ceara <dceara@redhat.com>
Reported-at: https://issues.redhat.com/browse/FDP-300
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-25 16:46:53 -05:00
+								    dp_packet_reset_offsets(b);
 								    dp_packet_reset_offload(b);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
 								/* Removes 'size' bytes from the head end of 'b', which must contain at least
 								 * 'size' bytes of data.  Returns the first byte of data removed. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_pull(struct dp_packet *b, size_t size)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    void *data = dp_packet_data(b);
 								    ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size);
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								    dp_packet_set_data(b, (char *) dp_packet_data(b) + size);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    dp_packet_set_size(b, dp_packet_size(b) - size);
 								    return data;
 								}
 								/* If 'b' has at least 'size' bytes of data, removes that many bytes from the
 								 * head end of 'b' and returns the first byte removed.  Otherwise, returns a
 								 * null pointer without modifying 'b'. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_try_pull(struct dp_packet *b, size_t size)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
 								        ? dp_packet_pull(b, size) : NULL;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline bool
 								dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return dp_packet_size(a) == dp_packet_size(b) &&
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								           !memcmp(dp_packet_data(a), dp_packet_data(b), dp_packet_size(a));
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								static inline bool
 								dp_packet_is_eth(const struct dp_packet *b)
 								{
 								    return b->packet_type == htonl(PT_ETH);
 								}
 								/* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								 * headers, so return NULL if it is not set. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								dp_packet_eth(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								    return (dp_packet_is_eth(b) && b->l3_ofs != UINT16_MAX)
 								            ? dp_packet_data(b) : NULL;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dpif-netdev: Preserve inner offloads on recirculation.

Rather than drop all pending Tx offloads on recirculation,
preserve inner offloads (and mark packet with outer Tx offloads)
when parsing the packet again.

Fixes: c6538b443984 ("dpif-netdev: Fix crash due to tunnel offloading on recirculation.")
Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Reported-at: https://issues.redhat.com/browse/FDP-1144
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-23 16:56:27 +01:00
+								/* Resets all outer layer offsets. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
-												dpif-netdev: Preserve inner offloads on recirculation.

Rather than drop all pending Tx offloads on recirculation,
preserve inner offloads (and mark packet with outer Tx offloads)
when parsing the packet again.

Fixes: c6538b443984 ("dpif-netdev: Fix crash due to tunnel offloading on recirculation.")
Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Reported-at: https://issues.redhat.com/browse/FDP-1144
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-23 16:56:27 +01:00
+								dp_packet_reset_outer_offsets(struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    b->l2_pad_size = 0;
 								    b->l2_5_ofs = UINT16_MAX;
 								    b->l3_ofs = UINT16_MAX;
 								    b->l4_ofs = UINT16_MAX;
-												dpif-netdev: Preserve inner offloads on recirculation.

Rather than drop all pending Tx offloads on recirculation,
preserve inner offloads (and mark packet with outer Tx offloads)
when parsing the packet again.

Fixes: c6538b443984 ("dpif-netdev: Fix crash due to tunnel offloading on recirculation.")
Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Reported-at: https://issues.redhat.com/browse/FDP-1144
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-01-23 16:56:27 +01:00
+								}
 								/* Resets all layer offsets.  'l3' offset must be set before 'l2' can be
 								 * retrieved. */
 								static inline void
 								dp_packet_reset_offsets(struct dp_packet *b)
 								{
 								    dp_packet_reset_outer_offsets(b);
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								    b->inner_l3_ofs = UINT16_MAX;
 								    b->inner_l4_ofs = UINT16_MAX;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												flow: Support extra padding length.

Although not required, padding can be optionally added until
the packet length is MTU bytes. A packet with extra padding
currently fails sanity checks.

Vulnerability: CVE-2020-35498
Fixes: fa8d9001a624 ("miniflow_extract: Properly handle small IP packets.")
Reported-by: Joakim Hindersson <joakim.hindersson@elastx.se>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-10-26 16:03:19 -03:00
+								static inline uint16_t
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								dp_packet_l2_pad_size(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return b->l2_pad_size;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
-												flow: Support extra padding length.

Although not required, padding can be optionally added until
the packet length is MTU bytes. A packet with extra padding
currently fails sanity checks.

Vulnerability: CVE-2020-35498
Fixes: fa8d9001a624 ("miniflow_extract: Properly handle small IP packets.")
Reported-by: Joakim Hindersson <joakim.hindersson@elastx.se>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-10-26 16:03:19 -03:00
+								dp_packet_set_l2_pad_size(struct dp_packet *b, uint16_t pad_size)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    ovs_assert(pad_size <= dp_packet_size(b));
 								    b->l2_pad_size = pad_size;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_l2_5(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								    return b->l2_5_ofs != UINT16_MAX
 								           ? (char *) dp_packet_data(b) + b->l2_5_ofs
 								           : NULL;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_set_l2_5(struct dp_packet *b, void *l2_5)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								    b->l2_5_ofs = l2_5
 								                  ? (char *) l2_5 - (char *) dp_packet_data(b)
 								                  : UINT16_MAX;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_l3(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								    return b->l3_ofs != UINT16_MAX
 								           ? (char *) dp_packet_data(b) + b->l3_ofs
 								           : NULL;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_set_l3(struct dp_packet *b, void *l3)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								    b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) : UINT16_MAX;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_l4(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								    return b->l4_ofs != UINT16_MAX
 								           ? (char *) dp_packet_data(b) + b->l4_ofs
 								           : NULL;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_set_l4(struct dp_packet *b, void *l4)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Remove 'frame' member.

In 'struct ofpbuf' the 'frame' pointer was used to parse different kinds of
data (Ethernet, OpenFlow, Netlink attributes).  For Ethernet packets the
'frame' pointer was supposed to have the same value as the 'data'
pointer.

Since 'struct dp_packet' is only used for Ethernet packets, there's no
need for a separate 'frame' pointer: we can use the 'data' pointer
instead.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:46 -07:00
+								    b->l4_ofs = l4 ? (char *) l4 - (char *) dp_packet_data(b) : UINT16_MAX;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Add 'dp_packet_l3_size()'.

The new api will be used in a subsequent patch.

Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-04 16:23:06 -08:00
+								/* Returns the size of the packet from the beginning of the L3 header to the
 								 * end of the L3 payload.  Hence L2 padding is not included. */
 								static inline size_t
 								dp_packet_l3_size(const struct dp_packet *b)
 								{
 								    return OVS_LIKELY(b->l3_ofs != UINT16_MAX)
 								        ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l3(b)
 								        - dp_packet_l2_pad_size(b)
 								        : 0;
 								}
 								/* Returns the size of the packet from the beginning of the L4 header to the
 								 * end of the L4 payload.  Hence L2 padding is not included. */
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline size_t
 								dp_packet_l4_size(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet: Add 'dp_packet_l3_size()'.

The new api will be used in a subsequent patch.

Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-04 16:23:06 -08:00
+								    return OVS_LIKELY(b->l4_ofs != UINT16_MAX)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								        ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l4(b)
 								        - dp_packet_l2_pad_size(b)
 								        : 0;
 								}
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								static inline void *
 								dp_packet_inner_l3(const struct dp_packet *b)
 								{
 								    return b->inner_l3_ofs != UINT16_MAX
 								           ? (char *) dp_packet_data(b) + b->inner_l3_ofs
 								           : NULL;
 								}
-												dp-packet: Correct IPv4 checksum calculation.

During the transition towards checksum offloading, the function to
handle software fallback of IPv4 checksums didn't account for the
options field.

Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
Reported-by: Jun Wang <junwang01@cestc.cn>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2024-July/053236.html
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-08-15 01:14:48 -04:00
+								static inline size_t
 								dp_packet_inner_l3_size(const struct dp_packet *b)
 								{
 								    return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
 								           ? (const char *) dp_packet_tail(b)
 								           - (const char *) dp_packet_inner_l3(b)
 								           - dp_packet_l2_pad_size(b)
 								           : 0;
 								}
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								static inline void *
 								dp_packet_inner_l4(const struct dp_packet *b)
 								{
 								    return b->inner_l4_ofs != UINT16_MAX
 								           ? (char *) dp_packet_data(b) + b->inner_l4_ofs
 								           : NULL;
 								}
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								static inline size_t
 								dp_packet_inner_l4_size(const struct dp_packet *b)
 								{
-												dp-packet: Validate correct offset for L4 inner size.

This patch fixes the correctness of dp_packet_inner_l4_size() when
checking for the existence of an inner L4 header. Previously it checked
for the outer L4 header.

This function is currently only used when a packet is already flagged
for tunneling, so an incorrect determination isn't possible as long as
the flags of the packet are correct.

Fixes: 85bcbbed839a ("userspace: Enable tunnel tests with TSO.")
Reviewed-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-02-12 01:50:18 -05:00
+								    return OVS_LIKELY(b->inner_l4_ofs != UINT16_MAX)
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								           ? (const char *) dp_packet_tail(b)
 								           - (const char *) dp_packet_inner_l4(b)
 								           - dp_packet_l2_pad_size(b)
 								           : 0;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline const void *
 								dp_packet_get_tcp_payload(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    size_t l4_size = dp_packet_l4_size(b);
 								    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
 								        struct tcp_header *tcp = dp_packet_l4(b);
 								        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
 								        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
 								            return (const char *)tcp + tcp_len;
 								        }
 								    }
 								    return NULL;
 								}
-												Userspace: Software fallback for UDP encapsulated TCP segmentation.

When sending packets that are flagged as requiring segmentation to an
interface that does not support this feature, send the packet to the TSO
software fallback instead of dropping it.

Reviewed-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-09-09 01:04:08 -04:00
+								static inline const void *
 								dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
 								{
 								    size_t l4_size = dp_packet_inner_l4_size(b);
 								    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
 								        struct tcp_header *tcp = dp_packet_inner_l4(b);
 								        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
 								        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
 								            return (const char *) tcp + tcp_len;
 								        }
 								    }
 								    return NULL;
 								}
-												pcap-file: Fix calculation of TCP payload length in tcp_reader_run().

The calculation in tcp_reader_run() failed to account for L2 padding.
This fixes the problem, by moving the existing function
tcp_payload_length() from a conntrack private header file into
dp-packet.h and renaming it to suit the dp_packet style.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-01-21 14:33:18 -08:00
+								static inline uint32_t
 								dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
 								{
 								    const char *tcp_payload = dp_packet_get_tcp_payload(pkt);
 								    if (tcp_payload) {
 								        return ((char *) dp_packet_tail(pkt) - dp_packet_l2_pad_size(pkt)
 								                - tcp_payload);
 								    } else {
 								        return 0;
 								    }
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline const void *
 								dp_packet_get_udp_payload(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN)
 								        ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline const void *
 								dp_packet_get_sctp_payload(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN)
 								        ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline const void *
 								dp_packet_get_icmp_payload(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN)
 								        ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline const void *
 								dp_packet_get_nd_payload(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return OVS_LIKELY(dp_packet_l4_size(b) >= ND_MSG_LEN)
 								        ? (const char *)dp_packet_l4(b) + ND_MSG_LEN : NULL;
 								}
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								#ifdef DPDK_NETDEV
 								static inline uint64_t *
 								dp_packet_ol_flags_ptr(const struct dp_packet *b)
 								{
 								    return CONST_CAST(uint64_t *, &b->mbuf.ol_flags);
 								}
 								static inline uint32_t *
 								dp_packet_rss_ptr(const struct dp_packet *b)
 								{
 								    return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
 								}
 								static inline uint32_t *
 								dp_packet_flow_mark_ptr(const struct dp_packet *b)
 								{
 								    return CONST_CAST(uint32_t *, &b->mbuf.hash.fdir.hi);
 								}
 								#else
 								static inline uint32_t *
 								dp_packet_ol_flags_ptr(const struct dp_packet *b)
 								{
 								    return CONST_CAST(uint32_t *, &b->ol_flags);
 								}
 								static inline uint32_t *
 								dp_packet_rss_ptr(const struct dp_packet *b)
 								{
 								    return CONST_CAST(uint32_t *, &b->rss_hash);
 								}
 								static inline uint32_t *
 								dp_packet_flow_mark_ptr(const struct dp_packet *b)
 								{
 								    return CONST_CAST(uint32_t *, &b->flow_mark);
 								}
 								#endif
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								#ifdef DPDK_NETDEV
 								BUILD_ASSERT_DECL(offsetof(struct dp_packet, mbuf) == 0);
-												dp-packet: Refactor offloading API.

1. No reason to have mbuf related APIs in a generic code.
2. Not only RSS/checksums should be invalidated in case of tunnel
   decapsulation or sending to 'ring' ports.

In order to fix two above issues, new function
'dp_packet_reset_offload' introduced. In order to clean up/unify
the code and simplify addition of new offloading features to non-DPDK
version of dp_packet, introduced 'ol_flags' bitmask. Additionally
reduced code complexity in 'dp_packet_clone_with_headroom' by using
already existent generic APIs.

Unfortunately, we still need to have a special case for mbuf
initialization inside 'dp_packet_init__()'.
'dp_packet_init_specific()' introduced for this purpose as a generic
API for initialization of the implementation-specific fields.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2019-02-26 13:38:37 +03:00
+								static inline void
 								dp_packet_init_specific(struct dp_packet *p)
 								{
 								    /* This initialization is needed for packets that do not come from DPDK
 								     * interfaces, when vswitchd is built with --with-dpdk. */
-												userspace: Add TCP Segmentation Offload support

Abbreviated as TSO, TCP Segmentation Offload is a feature which enables
the network stack to delegate the TCP segmentation to the NIC reducing
the per packet CPU overhead.

A guest using vhostuser interface with TSO enabled can send TCP packets
much bigger than the MTU, which saves CPU cycles normally used to break
the packets down to MTU size and to calculate checksums.

It also saves CPU cycles used to parse multiple packets/headers during
the packet processing inside virtual switch.

If the destination of the packet is another guest in the same host, then
the same big packet can be sent through a vhostuser interface skipping
the segmentation completely. However, if the destination is not local,
the NIC hardware is instructed to do the TCP segmentation and checksum
calculation.

It is recommended to check if NIC hardware supports TSO before enabling
the feature, which is off by default. For additional information please
check the tso.rst document.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Tested-by: Ciara Loftus <ciara.loftus.intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2020-01-17 18:47:55 -03:00
+								    p->mbuf.ol_flags = p->mbuf.tx_offload = p->mbuf.packet_type = 0;
-												dp-packet: Refactor offloading API.

1. No reason to have mbuf related APIs in a generic code.
2. Not only RSS/checksums should be invalidated in case of tunnel
   decapsulation or sending to 'ring' ports.

In order to fix two above issues, new function
'dp_packet_reset_offload' introduced. In order to clean up/unify
the code and simplify addition of new offloading features to non-DPDK
version of dp_packet, introduced 'ol_flags' bitmask. Additionally
reduced code complexity in 'dp_packet_clone_with_headroom' by using
already existent generic APIs.

Unfortunately, we still need to have a special case for mbuf
initialization inside 'dp_packet_init__()'.
'dp_packet_init_specific()' introduced for this purpose as a generic
API for initialization of the implementation-specific fields.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2019-02-26 13:38:37 +03:00
+								    p->mbuf.nb_segs = 1;
 								    p->mbuf.next = NULL;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void *
 								dp_packet_base(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    return b->mbuf.buf_addr;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_set_base(struct dp_packet *b, void *d)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
 								    b->mbuf.buf_addr = d;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline uint32_t
 								dp_packet_size(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    return b->mbuf.pkt_len;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_set_size(struct dp_packet *b, uint32_t v)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    /* netdev-dpdk does not currently support segmentation; consequently, for
 								     * all intents and purposes, 'data_len' (16 bit) and 'pkt_len' (32 bit) may
 								     * be used interchangably.
 								     *
 								     * On the datapath, it is expected that the size of packets
 								     * (and thus 'v') will always be <= UINT16_MAX; this means that there is no
 								     * loss of accuracy in assigning 'v' to 'data_len'.
 								     */
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
 								    ovs_assert(v <= UINT16_MAX);
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
 								    b->mbuf.pkt_len = v;             /* Total length of all segments linked to
 								                                      * this segment. */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline uint16_t
 								__packet_data(const struct dp_packet *b)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    return b->mbuf.data_off;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								__packet_set_data(struct dp_packet *b, uint16_t v)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								    b->mbuf.data_off = v;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline uint16_t
 								dp_packet_get_allocated(const struct dp_packet *b)
-												dp-packet: Merge 'allocated' member with DPDK mbuf 'buf_len'.

DPDK buf_len is only 16-bit wide ('allocated' was 32-bit), but it should
be enough to store the number of allocated bytes.

This will reduce 'struct dp_packet' size.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:47 -07:00
+								{
 								    return b->mbuf.buf_len;
 								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
 								dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
-												dp-packet: Merge 'allocated' member with DPDK mbuf 'buf_len'.

DPDK buf_len is only 16-bit wide ('allocated' was 32-bit), but it should
be enough to store the number of allocated bytes.

This will reduce 'struct dp_packet' size.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:47 -07:00
+								{
 								    b->mbuf.buf_len = s;
 								}
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
-												userspace: Respect tso/gso segment size.

Currently OVS will calculate the segment size based on the
MTU of the egress port. That usually happens to be correct
when the ports share the same MTU, but that is not always true.

Therefore, if the segment size is provided, then use that and
make sure the over sized packets are dropped.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-11-21 14:26:51 -05:00
+								static inline uint16_t
 								dp_packet_get_tso_segsz(const struct dp_packet *p)
 								{
 								    return p->mbuf.tso_segsz;
 								}
 								static inline void
 								dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
 								{
 								    p->mbuf.tso_segsz = s;
 								}
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								#else /* DPDK_NETDEV */
-												dp-packet: Refactor offloading API.

1. No reason to have mbuf related APIs in a generic code.
2. Not only RSS/checksums should be invalidated in case of tunnel
   decapsulation or sending to 'ring' ports.

In order to fix two above issues, new function
'dp_packet_reset_offload' introduced. In order to clean up/unify
the code and simplify addition of new offloading features to non-DPDK
version of dp_packet, introduced 'ol_flags' bitmask. Additionally
reduced code complexity in 'dp_packet_clone_with_headroom' by using
already existent generic APIs.

Unfortunately, we still need to have a special case for mbuf
initialization inside 'dp_packet_init__()'.
'dp_packet_init_specific()' introduced for this purpose as a generic
API for initialization of the implementation-specific fields.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2019-02-26 13:38:37 +03:00
 								static inline void
 								dp_packet_init_specific(struct dp_packet *p OVS_UNUSED)
 								{
 								    /* There are no implementation-specific fields for initialization. */
 								}
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								static inline void *
 								dp_packet_base(const struct dp_packet *b)
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								{
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								    return b->base_;
 								}
 								static inline void
 								dp_packet_set_base(struct dp_packet *b, void *d)
 								{
 								    b->base_ = d;
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								}
-												dp-packet: New function dp_packet_get_send_len().

This function is useful in a few places for representing the packet's
length minus the cutlen.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-08-02 14:58:23 -07:00
+								static inline uint32_t
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								dp_packet_size(const struct dp_packet *b)
-												dp-packet: New function dp_packet_get_send_len().

This function is useful in a few places for representing the packet's
length minus the cutlen.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-08-02 14:58:23 -07:00
+								{
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								    return b->size_;
-												dp-packet: New function dp_packet_get_send_len().

This function is useful in a few places for representing the packet's
length minus the cutlen.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-08-02 14:58:23 -07:00
+								}
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								static inline void
 								dp_packet_set_size(struct dp_packet *b, uint32_t v)
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								{
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								    b->size_ = v;
 								}
 								static inline uint16_t
 								__packet_data(const struct dp_packet *b)
 								{
 								    return b->data_ofs;
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								__packet_set_data(struct dp_packet *b, uint16_t v)
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								{
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								    b->data_ofs = v;
 								}
 								static inline uint16_t
 								dp_packet_get_allocated(const struct dp_packet *b)
 								{
 								    return b->allocated_;
-												lib: upgrade to DPDK v1.8.0

DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer.  So now dp_packet data is also stored as offset
from base pointer.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh@intel.com>
Signed-off-by: Rory Sexton <rory.sexton@intel.com>
Signed-off-by: Kevin Traynor <kevin.traynor@intel.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-02-17 13:20:04 -08:00
+								}
-												dp-packet: Style fixes.

Also, removes an unused function

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2015-05-18 10:47:48 -07:00
+								static inline void
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								{
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								    b->allocated_ = s;
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								}
-												userspace: Respect tso/gso segment size.

Currently OVS will calculate the segment size based on the
MTU of the egress port. That usually happens to be correct
when the ports share the same MTU, but that is not always true.

Therefore, if the segment size is provided, then use that and
make sure the over sized packets are dropped.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-11-21 14:26:51 -05:00
+								static inline uint16_t
 								dp_packet_get_tso_segsz(const struct dp_packet *p)
 								{
 								    return p->tso_segsz;
 								}
 								static inline void
 								dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
 								{
 								    p->tso_segsz = s;
 								}
-												dp-packet.h: move funcs to be within cond block

There is already an ifdef DPDK_NETDEV block, so instead of checking
on each and every function, move them to the right block.

No functional change.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Tiago Lam <tiago.lam@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-09-25 18:08:04 -03:00
+								#endif /* DPDK_NETDEV */
 								static inline void
 								dp_packet_reset_cutlen(struct dp_packet *b)
 								{
 								    b->cutlen = 0;
 								}
 								static inline uint32_t
 								dp_packet_set_cutlen(struct dp_packet *b, uint32_t max_len)
 								{
 								    if (max_len < ETH_HEADER_LEN) {
 								        max_len = ETH_HEADER_LEN;
 								    }
 								    if (max_len >= dp_packet_size(b)) {
 								        b->cutlen = 0;
 								    } else {
 								        b->cutlen = dp_packet_size(b) - max_len;
 								    }
 								    return b->cutlen;
 								}
 								static inline uint32_t
 								dp_packet_get_cutlen(const struct dp_packet *b)
 								{
 								    /* Always in valid range if user uses dp_packet_set_cutlen. */
 								    return b->cutlen;
 								}
 								static inline uint32_t
 								dp_packet_get_send_len(const struct dp_packet *b)
 								{
 								    return dp_packet_size(b) - dp_packet_get_cutlen(b);
 								}
 								static inline void *
 								dp_packet_data(const struct dp_packet *b)
 								{
 								    return __packet_data(b) != UINT16_MAX
 								           ? (char *) dp_packet_base(b) + __packet_data(b) : NULL;
 								}
 								static inline void
 								dp_packet_set_data(struct dp_packet *b, void *data)
 								{
 								    if (data) {
 								        __packet_set_data(b, (char *) data - (char *) dp_packet_base(b));
 								    } else {
 								        __packet_set_data(b, UINT16_MAX);
 								    }
 								}
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 								struct dp_packet_batch {
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    size_t count;
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								    bool trunc; /* true if the batch needs truncate. */
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								    struct dp_packet *packets[NETDEV_MAX_BURST];
 								};
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								static inline void
 								dp_packet_batch_init(struct dp_packet_batch *batch)
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								{
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    batch->count = 0;
 								    batch->trunc = false;
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								}
 								static inline void
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								dp_packet_batch_add__(struct dp_packet_batch *batch,
 								                      struct dp_packet *packet, size_t limit)
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								{
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    if (batch->count < limit) {
 								        batch->packets[batch->count++] = packet;
 								    } else {
 								        dp_packet_delete(packet);
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								    }
 								}
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								/* When the batch is full, 'packet' will be dropped and freed. */
 								static inline void
 								dp_packet_batch_add(struct dp_packet_batch *batch, struct dp_packet *packet)
 								{
 								    dp_packet_batch_add__(batch, packet, NETDEV_MAX_BURST);
 								}
 								static inline size_t
 								dp_packet_batch_size(const struct dp_packet_batch *batch)
 								{
 								    return batch->count;
 								}
-												dp-packet: Remove misleading comment for refill init function.

Function 'dp_packet_batch_refill_init' doesn't return anything.
Looks like this comment came from one of the intermediate versions
of the API enhancement patch. Additionally comment style changed
to be consistent with other comments in the same file.

CC: Andy Zhou <azhou@ovn.org>
Fixes: 72c84bc2db23 ("dp-packet: Enhance packet batch APIs.")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Andy Zhou <azhou@ovn.org>

											
										
										
											2017-07-14 11:11:38 +03:00
+								/* Clear 'batch' for refill. Use dp_packet_batch_refill() to add
 								 * packets back into the 'batch'. */
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								static inline void
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								dp_packet_batch_refill_init(struct dp_packet_batch *batch)
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								{
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    batch->count = 0;
 								};
 								static inline void
 								dp_packet_batch_refill(struct dp_packet_batch *batch,
 								                       struct dp_packet *packet, size_t idx)
 								{
 								    dp_packet_batch_add__(batch, packet, MIN(NETDEV_MAX_BURST, idx + 1));
 								}
 								static inline void
 								dp_packet_batch_init_packet(struct dp_packet_batch *batch, struct dp_packet *p)
 								{
 								    dp_packet_batch_init(batch);
 								    batch->count = 1;
 								    batch->packets[0] = p;
 								}
 								static inline bool
 								dp_packet_batch_is_empty(const struct dp_packet_batch *batch)
 								{
 								    return !dp_packet_batch_size(batch);
 								}
-												dp-packet: Add 'dp_packet_batch_is_full()' api.

This new api is used in a subsequent patch and may otherwise be useful.

Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-13 15:34:19 -08:00
+								static inline bool
 								dp_packet_batch_is_full(const struct dp_packet_batch *batch)
 								{
 								    return dp_packet_batch_size(batch) == NETDEV_MAX_BURST;
 								}
-												dp-packet: Add index to DP_PACKET_BATCH_FOR_EACH to prevent shadowing.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:41:30 -08:00
+								#define DP_PACKET_BATCH_FOR_EACH(IDX, PACKET, BATCH)                \
 								    for (size_t IDX = 0; IDX < dp_packet_batch_size(BATCH); IDX++)  \
-												dpif-netdev: Replace loop iterating over packet batch with macro.

The function dp_netdev_pmd_flush_output_on_port() iterates over the
p->output_pkts batch directly, when it should be using the special
iterator macro, DP_PACKET_BATCH_FOR_EACH.

However, this wasn't possible because the macro could not accept
&p->output_pkts.

The addition of parentheses when BATCH is dereferenced allows the macro
to expand properly. Parenthesizing arguments in macros is good practice
to be able to handle whichever expressions are passed in.

Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-04-26 15:24:31 -04:00
+								        if (PACKET = (BATCH)->packets[IDX], true)
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
 								/* Use this macro for cases where some packets in the 'BATCH' may be
 								 * dropped after going through each packet in the 'BATCH'.
 								 *
 								 * For packets to stay in the 'BATCH', they need to be refilled back
 								 * into the 'BATCH' by calling dp_packet_batch_refill(). Caller owns
 								 * the packets that are not refilled.
 								 *
 								 * Caller needs to supply 'SIZE', that stores the current number of
 								 * packets in 'BATCH'. It is best to declare this variable with
 								 * the 'const' modifier since it should not be modified by
 								 * the iterator.  */
 								#define DP_PACKET_BATCH_REFILL_FOR_EACH(IDX, SIZE, PACKET, BATCH)       \
 								    for (dp_packet_batch_refill_init(BATCH), IDX=0; IDX < SIZE; IDX++)  \
-												dpif-netdev: Replace loop iterating over packet batch with macro.

The function dp_netdev_pmd_flush_output_on_port() iterates over the
p->output_pkts batch directly, when it should be using the special
iterator macro, DP_PACKET_BATCH_FOR_EACH.

However, this wasn't possible because the macro could not accept
&p->output_pkts.

The addition of parentheses when BATCH is dereferenced allows the macro
to expand properly. Parenthesizing arguments in macros is good practice
to be able to handle whichever expressions are passed in.

Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-04-26 15:24:31 -04:00
+								         if (PACKET = (BATCH)->packets[IDX], true)
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
 								static inline void
 								dp_packet_batch_clone(struct dp_packet_batch *dst,
 								                      struct dp_packet_batch *src)
 								{
 								    struct dp_packet *packet;
 								    dp_packet_batch_init(dst);
-												dp-packet: Add index to DP_PACKET_BATCH_FOR_EACH to prevent shadowing.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:41:30 -08:00
+								    DP_PACKET_BATCH_FOR_EACH (i, packet, src) {
-												dp-packet: prefetch the next packet when cloning a batch.

There is a cache miss when accessing mbuf->data_off while cloning
a batch and using prefetch improved the throughput by ~2.3%.

Before: 13709416.30 pps
 After: 14031475.80 pps

Fixes: d48771848560 ("dp-packet: preserve headroom when cloning a pkt batch")
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-02-07 09:01:13 -03:00
+								        if (i + 1 < dp_packet_batch_size(src)) {
 								            OVS_PREFETCH(src->packets[i + 1]);
 								        }
-												dp-packet: preserve headroom when cloning a pkt batch

The headroom is useful if the packet needs to insert additional
header, so preserve the original headroom when cloning the batch.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Tested-by: Ciara Loftus <ciara.loftus.intel.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2020-01-16 14:00:33 -03:00
+								        uint32_t headroom = dp_packet_headroom(packet);
 								        struct dp_packet *pkt_clone;
 								        pkt_clone  = dp_packet_clone_with_headroom(packet, headroom);
 								        dp_packet_batch_add(dst, pkt_clone);
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    }
-												dp-packet: Copy trunc flag on batch clone.

Without this applying of the cutlen action will not work
on copied batch. Cutlen works for linux and dummy netdevs
only because they tries to apply it per-packet inside
send function.

Cutlen action doesn't work for dpdk ports in case batch clone
occured because invoked by the 'dp_packet_batch_apply_cutlen()'.

CC: Andy Zhou <azhou@ovn.org>
Fixes: 72c84bc2db23 ("dp-packet: Enhance packet batch APIs.")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Andy Zhou <azhou@ovn.org>

											
										
										
											2017-06-30 14:00:14 +03:00
+								    dst->trunc = src->trunc;
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								}
 								static inline void
-												odp-execute: Rename 'may_steal' to 'should_steal'.

Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-16 19:24:46 -07:00
+								dp_packet_delete_batch(struct dp_packet_batch *batch, bool should_steal)
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								{
-												odp-execute: Rename 'may_steal' to 'should_steal'.

Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-16 19:24:46 -07:00
+								    if (should_steal) {
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								        struct dp_packet *packet;
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
-												dp-packet: Add index to DP_PACKET_BATCH_FOR_EACH to prevent shadowing.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:41:30 -08:00
+								        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								            dp_packet_delete(packet);
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								        }
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								        dp_packet_batch_init(batch);
-												dpif-netdev: create batch object

DPDK datapath operate on batch of packets. To pass the batch of
packets around we use packets array and count.  Next patch needs
to associate meta-data with each batch of packets. So Introducing
a batch structure to make handling the metadata easier.

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>

											
										
										
											2016-05-17 17:32:33 -07:00
+								    }
 								}
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								static inline void
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								dp_packet_batch_apply_cutlen(struct dp_packet_batch *batch)
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								{
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    if (batch->trunc) {
 								        struct dp_packet *packet;
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
-												dp-packet: Add index to DP_PACKET_BATCH_FOR_EACH to prevent shadowing.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:41:30 -08:00
+								        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-												dp-packet: New function dp_packet_get_send_len().

This function is useful in a few places for representing the packet's
length minus the cutlen.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-08-02 14:58:23 -07:00
+								            dp_packet_set_size(packet, dp_packet_get_send_len(packet));
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								            dp_packet_reset_cutlen(packet);
 								        }
 								        batch->trunc = false;
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								    }
 								}
 								static inline void
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								dp_packet_batch_reset_cutlen(struct dp_packet_batch *batch)
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								{
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								    if (batch->trunc) {
 								        struct dp_packet *packet;
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
-												dp-packet: Add index to DP_PACKET_BATCH_FOR_EACH to prevent shadowing.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:41:30 -08:00
+								        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-												dp-packet: Enhance packet batch APIs.

One common use case of 'struct dp_packet_batch' is to process all
packets in the batch in order. Add an iterator for this use case
to simplify the logic of calling sites,

Another common use case is to drop packets in the batch, by reading
all packets, but writing back pointers of fewer packets. Add macros
to support this use case.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-17 15:56:58 -08:00
+								            dp_packet_reset_cutlen(packet);
 								        }
 								        batch->trunc = false;
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								    }
 								}
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								/* Returns the RSS hash of the packet 'p'.  Note that the returned value is
 								 * correct only if 'dp_packet_rss_valid(p)' returns 'true'. */
 								static inline uint32_t
 								dp_packet_get_rss_hash(const struct dp_packet *p)
 								{
 								    return *dp_packet_rss_ptr(p);
 								}
 								static inline void
 								dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash)
 								{
 								    *dp_packet_rss_ptr(p) = hash;
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    p->has_hash = true;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
 								static inline bool
 								dp_packet_rss_valid(const struct dp_packet *p)
 								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    return p->has_hash;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
 								static inline void
 								dp_packet_reset_offload(struct dp_packet *p)
 								{
 								    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_SUPPORTED_MASK;
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    p->has_hash = p->has_mark = false;
 								    p->offloads = 0;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
 								static inline bool
 								dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark)
 								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    if (p->has_mark) {
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								        *mark = *dp_packet_flow_mark_ptr(p);
 								        return true;
 								    }
 								    return false;
 								}
 								static inline void
 								dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
 								{
 								    *dp_packet_flow_mark_ptr(p) = mark;
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    p->has_mark = true;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
-												dp-packet: Rework tunnel offloads.

Rather than set bits in the mbuf ol_flags field, that only makes sense
for netdev-dpdk ports, mark packet for tunnel offload in OVS offloads
API.

While at it, since there is nothing really "hardware" related, rename
current API for consistency with dp_packet_tunnel_ prefix.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:56 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_tunnel_geneve(const struct dp_packet *b)
 								{
 								    return (b->offloads & DP_PACKET_OL_TUNNEL_MASK)
 								           == DP_PACKET_OL_TUNNEL_GENEVE;
 								}
 								static inline void
 								dp_packet_tunnel_set_geneve(struct dp_packet *b)
 								{
 								    b->offloads &= ~DP_PACKET_OL_TUNNEL_VXLAN;
 								    b->offloads |= DP_PACKET_OL_TUNNEL_GENEVE;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_tunnel_vxlan(const struct dp_packet *b)
 								{
 								    return (b->offloads & DP_PACKET_OL_TUNNEL_MASK)
 								           == DP_PACKET_OL_TUNNEL_VXLAN;
 								}
 								static inline void
 								dp_packet_tunnel_set_vxlan(struct dp_packet *b)
 								{
 								    b->offloads &= ~DP_PACKET_OL_TUNNEL_GENEVE;
 								    b->offloads |= DP_PACKET_OL_TUNNEL_VXLAN;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_tunnel_gre(const struct dp_packet *b)
 								{
 								    return (b->offloads & DP_PACKET_OL_TUNNEL_MASK)
 								           == DP_PACKET_OL_TUNNEL_MASK;
 								}
 								static inline void
 								dp_packet_tunnel_set_gre(struct dp_packet *b)
 								{
 								    b->offloads |= DP_PACKET_OL_TUNNEL_MASK;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_tunnel(const struct dp_packet *b)
 								{
 								    return !!(b->offloads & DP_PACKET_OL_TUNNEL_MASK);
 								}
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								/* Marks packet 'p' with good IPv4 checksum. */
 								static inline void
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								dp_packet_ip_checksum_set_good(struct dp_packet *p)
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    p->offloads &= ~DP_PACKET_OL_IP_CKSUM_BAD;
 								    p->offloads |= DP_PACKET_OL_IP_CKSUM_GOOD;
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								}
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_ip_checksum_bad(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_IP_CKSUM_MASK)
 								            == DP_PACKET_OL_IP_CKSUM_BAD;
 								}
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								static inline void
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								dp_packet_ip_checksum_set_bad(struct dp_packet *p)
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    p->offloads &= ~DP_PACKET_OL_IP_CKSUM_GOOD;
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    p->offloads |= DP_PACKET_OL_IP_CKSUM_BAD;
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								}
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								/* Returns 'true' if the IPv4 header has good integrity but the
 								 * checksum in it is incomplete. */
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_ip_checksum_partial(const struct dp_packet *p)
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    return (p->offloads & DP_PACKET_OL_IP_CKSUM_MASK)
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								            == DP_PACKET_OL_IP_CKSUM_MASK;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								/* Marks packet 'p' as having a valid IPv4 header, but no checksum. */
-												flow: Fix bad IP checksum flag.

flow_compose() can generate packets with bad IPv4 checksum, however the
associated Rx flags were not correctly set.
The usefulness of setting this metadata seems limited, yet fix this for
consistency.

Fixes: c62b4ac8f8da ("ovs-ofctl: Implement compose-packet --bare [--bad-csum].")
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:30 +01:00
+								static inline void
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								dp_packet_ip_checksum_set_partial(struct dp_packet *p)
-												flow: Fix bad IP checksum flag.

flow_compose() can generate packets with bad IPv4 checksum, however the
associated Rx flags were not correctly set.
The usefulness of setting this metadata seems limited, yet fix this for
consistency.

Fixes: c62b4ac8f8da ("ovs-ofctl: Implement compose-packet --bare [--bad-csum].")
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:30 +01:00
+								{
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    p->offloads |= DP_PACKET_OL_IP_CKSUM_MASK;
-												flow: Fix bad IP checksum flag.

flow_compose() can generate packets with bad IPv4 checksum, however the
associated Rx flags were not correctly set.
The usefulness of setting this metadata seems limited, yet fix this for
consistency.

Fixes: c62b4ac8f8da ("ovs-ofctl: Implement compose-packet --bare [--bad-csum].")
Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:30 +01:00
+								}
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_ip_checksum_unknown(const struct dp_packet *p)
-												netdev-linux: Only repair IP checksum in IPv4.

Previously a change was added to the vnet prepend code to solve for the
case where no L4 checksum offloading was needed but the L3 checksum
hadn't been calculated. But the added check didn't properly account
for IPv6 traffic.

Fixes: 85bcbbed839a ("userspace: Enable tunnel tests with TSO.")
Reported-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-02-15 17:53:03 -05:00
+								{
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    return !(p->offloads & DP_PACKET_OL_IP_CKSUM_MASK);
 								}
-												dp-packet: Remove DPDK specific IP version.

Flagging packets with IP version is only needed at the netdev-dpdk level.

In most cases, OVS is already inspecting the IP header in packet data,
so maintaining such IP version metadata won't save much cycles
(given the cost of additional branches necessary for handling
outer/inner flags).

Cleanup OVS shared code and only set these flags in netdev-dpdk.c.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:53 +02:00
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								static inline void
 								dp_packet_ip_checksum_set_unknown(struct dp_packet *p)
 								{
 								    p->offloads &= ~DP_PACKET_OL_IP_CKSUM_MASK;
-												netdev-linux: Only repair IP checksum in IPv4.

Previously a change was added to the vnet prepend code to solve for the
case where no L4 checksum offloading was needed but the L3 checksum
hadn't been calculated. But the added check didn't properly account
for IPv6 traffic.

Fixes: 85bcbbed839a ("userspace: Enable tunnel tests with TSO.")
Reported-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-02-15 17:53:03 -05:00
+								}
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_ip_checksum_valid(const struct dp_packet *p)
-												netdev-native-tnl: Mark all vxlan/geneve packets as tunneled.

Previously some packets were excluded from the tunnel mark if they
weren't L4. However, this causes problems with multi encapsulated
packets like arp.

Due to these flags being set, additional checks are required in checksum
modification code.

Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Reported-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-02-15 17:53:04 -05:00
+								{
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								    return !!(p->offloads & DP_PACKET_OL_IP_CKSUM_GOOD);
 								}
-												dp-packet: Remove DPDK specific IP version.

Flagging packets with IP version is only needed at the netdev-dpdk level.

In most cases, OVS is already inspecting the IP header in packet data,
so maintaining such IP version metadata won't save much cycles
(given the cost of additional branches necessary for handling
outer/inner flags).

Cleanup OVS shared code and only set these flags in netdev-dpdk.c.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:53 +02:00
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
+								/* Marks packet 'p' with good inner IPv4 checksum. */
 								static inline void
 								dp_packet_inner_ip_checksum_set_good(struct dp_packet *p)
 								{
 								    p->offloads &= ~DP_PACKET_OL_INNER_IP_CKSUM_BAD;
 								    p->offloads |= DP_PACKET_OL_INNER_IP_CKSUM_GOOD;
 								}
 								/* Returns 'true' if the inner IPv4 header has good integrity but the
 								 * checksum in it is incomplete. */
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_inner_ip_checksum_partial(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_INNER_IP_CKSUM_MASK)
 								            == DP_PACKET_OL_INNER_IP_CKSUM_MASK;
 								}
 								/* Marks packet 'p' as having a valid inner IPv4 header, but no checksum. */
 								static inline void
 								dp_packet_inner_ip_checksum_set_partial(struct dp_packet *p)
 								{
 								    p->offloads |= DP_PACKET_OL_INNER_IP_CKSUM_MASK;
-												netdev-native-tnl: Mark all vxlan/geneve packets as tunneled.

Previously some packets were excluded from the tunnel mark if they
weren't L4. However, this causes problems with multi encapsulated
packets like arp.

Due to these flags being set, additional checks are required in checksum
modification code.

Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Reported-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-02-15 17:53:04 -05:00
+								}
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								/* Calculate and set the IPv4 header checksum in packet 'p'. */
 								static inline void
-												userspace: Support VXLAN and GENEVE TSO.

For userspace datapath, this patch provides vxlan and geneve tunnel tso.
Only support userspace vxlan or geneve tunnel, meanwhile support
tunnel outter and inner csum offload. If netdev do not support offload
features, there is a software fallback.If netdev do not support vxlan
and geneve tso,packets will drop. Front-end devices can close offload
features by ethtool also.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Dexia Li <dexia.li@jaguarmicro.com>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:30 -05:00
+								dp_packet_ip_set_header_csum(struct dp_packet *p, bool inner)
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								{
-												dp-packet: Correct IPv4 checksum calculation.

During the transition towards checksum offloading, the function to
handle software fallback of IPv4 checksums didn't account for the
options field.

Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
Reported-by: Jun Wang <junwang01@cestc.cn>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2024-July/053236.html
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-08-15 01:14:48 -04:00
+								    struct ip_header *ip;
 								    size_t l3_size;
 								    size_t ip_len;
 								    if (inner) {
 								        ip = dp_packet_inner_l3(p);
 								        l3_size = dp_packet_inner_l3_size(p);
 								    } else {
 								        ip = dp_packet_l3(p);
 								        l3_size = dp_packet_l3_size(p);
 								    }
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
 								    ovs_assert(ip);
-												dp-packet: Correct IPv4 checksum calculation.

During the transition towards checksum offloading, the function to
handle software fallback of IPv4 checksums didn't account for the
options field.

Fixes: 5d11c47d3ebe ("userspace: Enable IP checksum offloading by default.")
Reported-by: Jun Wang <junwang01@cestc.cn>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2024-July/053236.html
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-08-15 01:14:48 -04:00
 								    ip_len = IP_IHL(ip->ip_ihl_ver) * 4;
 								    if (OVS_LIKELY(ip_len >= IP_HEADER_LEN && ip_len < l3_size)) {
 								        ip->ip_csum = 0;
 								        ip->ip_csum = csum(ip, ip_len);
 								    }
-												dp-packet: Rework IP checksum offloads.

As the packet traverses through OVS, offloading Tx flags must be carefully
evaluated and updated which results in a bit of complexity because of a
separate "outer" Tx offloading flag coming from DPDK API,
and a "normal"/"inner" Tx offloading flag.

On the other hand, the DPDK mbuf API specifies 4 status when it comes to
IP checksums:
- RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
- RTE_MBUF_F_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
- RTE_MBUF_F_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
- RTE_MBUF_F_RX_IP_CKSUM_NONE: the IP checksum is not correct in the
  packet data, but the integrity of the IP header is verified.

This patch changes OVS API so that OVS code only tracks the status of
the checksum of the "current" L3 header and let the Tx flags aspect to
the netdev-* implementations.

With this API, the flow extraction can be cleaned up.

During packet processing, OVS can simply look for the IP checksum validity
(either good, or partial) before changing some IP header, and then mark
the checksum as partial.

In the conntrack case, when natting packets, the checksum status of the
inner part (ICMP error case) must be forced temporarily as unknown
to force checksum resolution.

When tunneling comes into play, IP checksums status is bit-shifted for
future considerations in the processing if, for example, the tunnel
header gets decapsulated again, or in the netdev-* implementations that
support tunnel offloading.

Finally, netdev-* implementations only need to care about packets in
partial status: a good checksum does not need touching, a bad checksum
has been updated by kept as bad by OVS, an unknown checksum is either
an IPv6 or if it was an IPv4, OVS updated it too (keeping it good or bad
accordingly).

Rename current API for consistency with dp_packet_(inner_)?ip_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:57 +02:00
 								    if (inner) {
 								        dp_packet_inner_ip_checksum_set_good(p);
 								    } else {
 								        dp_packet_ip_checksum_set_good(p);
 								    }
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								}
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_l4_proto_tcp(const struct dp_packet *b)
 								{
 								    return (b->offloads & DP_PACKET_OL_L4_PROTO_MASK)
 								            == DP_PACKET_OL_L4_PROTO_TCP;
 								}
 								static inline void
 								dp_packet_l4_proto_set_tcp(struct dp_packet *b)
 								{
 								    b->offloads &= ~DP_PACKET_OL_L4_PROTO_UDP;
 								    b->offloads |= DP_PACKET_OL_L4_PROTO_TCP;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_l4_proto_udp(const struct dp_packet *b)
 								{
 								    return (b->offloads & DP_PACKET_OL_L4_PROTO_MASK)
 								            == DP_PACKET_OL_L4_PROTO_UDP;
 								}
 								static inline void
 								dp_packet_l4_proto_set_udp(struct dp_packet *b)
 								{
 								    b->offloads &= ~DP_PACKET_OL_L4_PROTO_TCP;
 								    b->offloads |= DP_PACKET_OL_L4_PROTO_UDP;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_l4_proto_sctp(const struct dp_packet *b)
 								{
 								    return (b->offloads & DP_PACKET_OL_L4_PROTO_MASK)
 								            == DP_PACKET_OL_L4_PROTO_MASK;
 								}
 								static inline void
 								dp_packet_l4_proto_set_sctp(struct dp_packet *b)
 								{
 								    b->offloads |= DP_PACKET_OL_L4_PROTO_MASK;
 								}
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								/* Returns 'true' if the packet 'p' has good integrity and the
 								 * checksum in it is correct. */
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
-												userspace: Enable IP checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:26 -04:00
+								dp_packet_l4_checksum_good(const struct dp_packet *p)
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    return (p->offloads & DP_PACKET_OL_L4_CKSUM_MASK)
 								            == DP_PACKET_OL_L4_CKSUM_GOOD;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								/* Marks packet 'p' with good L4 checksum. */
 								static inline void
 								dp_packet_l4_checksum_set_good(struct dp_packet *p)
 								{
 								    p->offloads &= ~DP_PACKET_OL_L4_CKSUM_BAD;
 								    p->offloads |= DP_PACKET_OL_L4_CKSUM_GOOD;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								dp_packet_l4_checksum_bad(const struct dp_packet *p)
 								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    return (p->offloads & DP_PACKET_OL_L4_CKSUM_MASK)
 								            == DP_PACKET_OL_L4_CKSUM_BAD;
-												userspace: Enable TSO support for non-DPDK.

This patch enables TSO support for non-DPDK use cases, and
also add check-system-tso testsuite. Before TSO, we have to
disable checksum offload, allowing the kernel to calculate the
TCP/UDP packet checsum. With TSO, we can skip the checksum
validation by enabling checksum offload, and with large packet
size, we see better performance.

Consider container to container use cases:
  iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1)
And I got around 6Gbps, similar to TSO with DPDK-enabled.

Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 15:10:50 -07:00
+								}
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								static inline void
 								dp_packet_l4_checksum_set_bad(struct dp_packet *p)
 								{
 								    p->offloads &= ~DP_PACKET_OL_L4_CKSUM_GOOD;
 								    p->offloads |= DP_PACKET_OL_L4_CKSUM_BAD;
 								}
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								/* Returns 'true' if the packet has good integrity though the
 								 * checksum in the packet 'p' is not complete. */
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_l4_checksum_partial(const struct dp_packet *p)
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    return (p->offloads & DP_PACKET_OL_L4_CKSUM_MASK)
 								            == DP_PACKET_OL_L4_CKSUM_MASK;
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								}
 								/* Marks packet 'p' with good integrity though the checksum in the
 								 * packet is not complete. */
 								static inline void
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								dp_packet_l4_checksum_set_partial(struct dp_packet *p)
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								{
-												dp-packet: Add OVS offloading API.

As a preparation for tracking inner checksums, separate Rx checksum
status from the DPDK ol_flags field.
To minimize the cost of translating from DPDK API to OVS API, simply map
OVS flags to DPDK Rx mbuf flags.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:54 +02:00
+								    p->offloads |= DP_PACKET_OL_L4_CKSUM_MASK;
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								}
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_l4_checksum_unknown(const struct dp_packet *p)
 								{
 								    return !(p->offloads & DP_PACKET_OL_L4_CKSUM_MASK);
 								}
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								static inline void
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								dp_packet_l4_checksum_set_unknown(struct dp_packet *p)
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								{
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    p->offloads &= ~DP_PACKET_OL_L4_CKSUM_MASK;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_l4_checksum_valid(const struct dp_packet *p)
 								{
 								    return !!(p->offloads & DP_PACKET_OL_L4_CKSUM_GOOD);
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_inner_l4_proto_tcp(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_INNER_L4_PROTO_MASK)
 								            == DP_PACKET_OL_INNER_L4_PROTO_TCP;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_inner_l4_proto_udp(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_INNER_L4_PROTO_MASK)
 								            == DP_PACKET_OL_INNER_L4_PROTO_UDP;
 								}
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_inner_l4_proto_sctp(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_INNER_L4_PROTO_MASK)
 								            == DP_PACKET_OL_INNER_L4_PROTO_MASK;
 								}
 								/* Returns 'true' if the inner L4 header has good integrity and the
 								 * checksum in it is complete. */
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_inner_l4_checksum_good(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_INNER_L4_CKSUM_MASK)
 								            == DP_PACKET_OL_INNER_L4_CKSUM_GOOD;
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								}
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								/* Marks packet 'p' as having a valid inner l4 header, but no checksum. */
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								static inline void
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								dp_packet_inner_l4_checksum_set_good(struct dp_packet *p)
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								{
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    p->offloads &= ~DP_PACKET_OL_INNER_L4_CKSUM_BAD;
 								    p->offloads |= DP_PACKET_OL_INNER_L4_CKSUM_GOOD;
 								}
 								/* Returns 'true' if the inner L4 header has good integrity but the
 								 * checksum in it is incomplete. */
 								static inline bool OVS_WARN_UNUSED_RESULT
 								dp_packet_inner_l4_checksum_partial(const struct dp_packet *p)
 								{
 								    return (p->offloads & DP_PACKET_OL_INNER_L4_CKSUM_MASK)
 								            == DP_PACKET_OL_INNER_L4_CKSUM_MASK;
-												userspace: Enable L4 checksum offloading by default.

The netdev receiving packets is supposed to provide the flags
indicating if the L4 checksum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 checksum when the packet is going to be sent
over a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-14 15:03:27 -04:00
+								}
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								/* Marks packet 'p' as having a valid inner l4 header, but no checksum. */
-												dpif-netdev: Enhance checksum coverage.

Enhance netdev-dummy:
- add debug log,
- split Rx and Tx aspects,
- add coverage for bad status,

Enhance unit tests:
- enable Tx offloads on the transmitting port,
- test L4 checksums for TCP and UDP (and partial status),
- test IPv6,

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:31 +01:00
+								static inline void
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								dp_packet_inner_l4_checksum_set_partial(struct dp_packet *p)
-												dpif-netdev: Enhance checksum coverage.

Enhance netdev-dummy:
- add debug log,
- split Rx and Tx aspects,
- add coverage for bad status,

Enhance unit tests:
- enable Tx offloads on the transmitting port,
- test L4 checksums for TCP and UDP (and partial status),
- test IPv6,

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:31 +01:00
+								{
-												dp-packet: Rework L4 checksum offloads.

The DPDK mbuf API specifies 4 status when it comes to L4 checksums:
- RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
- RTE_MBUF_F_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
- RTE_MBUF_F_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
- RTE_MBUF_F_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
  data, but the integrity of the L4 data is verified.

Similarly to the IP checksum offloads API, revise OVS L4 offloads API.

No information about the L4 protocol is provided by any netdev-*
implementation, so OVS needs to mark this L4 protocol during flow
extraction.

Rename current API for consistency with dp_packet_(inner_)?l4_checksum_.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-06-17 09:20:58 +02:00
+								    p->offloads |= DP_PACKET_OL_INNER_L4_CKSUM_MASK;
-												dpif-netdev: Enhance checksum coverage.

Enhance netdev-dummy:
- add debug log,
- split Rx and Tx aspects,
- add coverage for bad status,

Enhance unit tests:
- enable Tx offloads on the transmitting port,
- test L4 checksums for TCP and UDP (and partial status),
- test IPv6,

Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-03-13 13:43:31 +01:00
+								}
-												userspace: Enable tunnel tests with TSO.

This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-17 14:26:31 -05:00
+								static inline void
 								dp_packet_reset_packet(struct dp_packet *b, int off)
 								{
 								    dp_packet_set_size(b, dp_packet_size(b) - off);
 								    dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
 								    dp_packet_reset_offsets(b);
 								}
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								static inline uint32_t ALWAYS_INLINE
 								dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
 								                         uint32_t hash)
 								{
 								    const void *ipv4_src = &pkt[l3_ofs + offsetof(struct ip_header, ip_src)];
 								    const void *ipv4_dst = &pkt[l3_ofs + offsetof(struct ip_header, ip_dst)];
 								    uint32_t ip_src, ip_dst;
 								    memcpy(&ip_src, ipv4_src, sizeof ip_src);
 								    memcpy(&ip_dst, ipv4_dst, sizeof ip_dst);
 								    /* IPv4 Src and Dst. */
 								    hash = hash_add(hash, ip_src);
 								    hash = hash_add(hash, ip_dst);
 								    /* IPv4 proto. */
 								    hash = hash_add(hash, pkt[l3_ofs + offsetof(struct ip_header, ip_proto)]);
 								    return hash;
 								}
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
+								static inline void ALWAYS_INLINE
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								dp_packet_update_rss_hash_ipv4(struct dp_packet *packet)
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
+								{
 								    if (dp_packet_rss_valid(packet)) {
 								        return;
 								    }
 								    const uint8_t *pkt = dp_packet_data(packet);
 								    const uint16_t l3_ofs = packet->l3_ofs;
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								    uint32_t hash = 0;
 								    /* IPv4 Src, Dst and proto. */
 								    hash = dp_packet_calc_hash_ipv4(pkt, l3_ofs, hash);
 								    hash = hash_finish(hash, 42);
 								    dp_packet_set_rss_hash(packet, hash);
 								}
 								static inline void ALWAYS_INLINE
 								dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet)
 								{
 								    if (dp_packet_rss_valid(packet)) {
 								        return;
 								    }
 								    const uint8_t *pkt = dp_packet_data(packet);
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
+								    const void *l4_ports = &pkt[packet->l4_ofs];
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								    const uint16_t l3_ofs = packet->l3_ofs;
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
+								    uint32_t hash = 0;
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								    uint32_t ports;
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								    /* IPv4 Src, Dst and proto. */
 								    hash = dp_packet_calc_hash_ipv4(pkt, l3_ofs, hash);
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
 								    /* L4 ports. */
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								    memcpy(&ports,  l4_ports, sizeof ports);
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
+								    hash = hash_add(hash, ports);
-												dpif-netdev/mfex: Add AVX512 NVGRE traffic profiles.

A typical NVGRE encapsulated packet starts with the ETH/IP/GRE
protocols.  Miniflow extract will parse just the ETH and IP headers. The
GRE header will be processed later as part of the pop action. Add
support for parsing the ETH/IP headers in this scenario.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-09-16 10:12:05 +00:00
+								    hash = hash_finish(hash, 42);
-												dpif-netdev/mfex: Add ipv4 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv4 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
Co-authored-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Co-authored-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Cian Ferriter <cian.ferriter@intel.com>
Acked-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-04-12 19:23:08 +05:30
+								    dp_packet_set_rss_hash(packet, hash);
 								}
-												dpif-netdev/mfex: Add ipv6 profile based hashing.

For packets which don't already have a hash calculated,
miniflow_hash_5tuple() calculates the hash of a packet
using the previously built miniflow.

This commit adds IPv6 profile specific hashing which
uses fixed offsets into the packet to improve hashing
performance.

Signed-off-by: Kumar Amber <kumar.amber@intel.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2022-07-01 17:58:38 +05:30
+								static inline void ALWAYS_INLINE
 								dp_packet_update_rss_hash_ipv6_tcp_udp(struct dp_packet *packet)
 								{
 								    if (dp_packet_rss_valid(packet)) {
 								        return;
 								    }
 								    const uint8_t *pkt = dp_packet_data(packet);
 								    const uint16_t l3_ofs = packet->l3_ofs;
 								    uint32_t ipv6_src_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_src);
 								    uint32_t ipv6_dst_off = offsetof(struct ovs_16aligned_ip6_hdr, ip6_dst);
 								    uint32_t ipv6_proto_off = offsetof(struct ovs_16aligned_ip6_hdr,
 								                                       ip6_ctlun.ip6_un1.ip6_un1_nxt);
 								    const void *ipv6_src_l = &pkt[l3_ofs + ipv6_src_off];
 								    const void *ipv6_src_h = &pkt[l3_ofs + ipv6_src_off + 8];
 								    const void *ipv6_dst_l = &pkt[l3_ofs + ipv6_dst_off];
 								    const void *ipv6_dst_h = &pkt[l3_ofs + ipv6_dst_off + 8];
 								    const void *l4_ports = &pkt[packet->l4_ofs];
 								    uint64_t ipv6_src_lo, ipv6_src_hi;
 								    uint64_t ipv6_dst_lo, ipv6_dst_hi;
 								    uint32_t ports;
 								    uint32_t hash = 0;
 								    memcpy(&ipv6_src_lo, ipv6_src_l, sizeof ipv6_src_lo);
 								    memcpy(&ipv6_src_hi, ipv6_src_h, sizeof ipv6_src_hi);
 								    memcpy(&ipv6_dst_lo, ipv6_dst_l, sizeof ipv6_dst_lo);
 								    memcpy(&ipv6_dst_hi, ipv6_dst_h, sizeof ipv6_dst_hi);
 								    memcpy(&ports, l4_ports, sizeof ports);
 								    /* IPv6 Src and Dst. */
 								    hash = hash_add64(hash, ipv6_src_lo);
 								    hash = hash_add64(hash, ipv6_src_hi);
 								    hash = hash_add64(hash, ipv6_dst_lo);
 								    hash = hash_add64(hash, ipv6_dst_hi);
 								    /* IPv6 proto. */
 								    hash = hash_add(hash, pkt[l3_ofs + ipv6_proto_off]);
 								    /* L4 ports. */
 								    hash = hash_add(hash, ports);
 								    hash = hash_finish(hash, 42);
 								    dp_packet_set_rss_hash(packet, hash);
 								}
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								#ifdef  __cplusplus
 								}
 								#endif
-												dp-packet.h: Fix a typo in #endif comment

Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-03-04 14:10:11 +09:00
+								#endif /* dp-packet.h */