ovs/lib/flow.h

/*
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef FLOW_H
#define FLOW_H 1

#include <sys/types.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include "bitmap.h"
#include "byte-order.h"
#include "openvswitch/compiler.h"
#include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
#include "openvswitch/flow.h"
#include "packets.h"
#include "hash.h"
#include "util.h"

struct dpif_flow_stats;
struct dpif_flow_attrs;
struct ds;
struct flow_wildcards;
struct minimask;
struct dp_packet;
struct ofputil_port_map;
struct pkt_metadata;
struct match;

/* Some flow fields are mutually exclusive or only appear within the flow
 * pipeline.  IPv6 headers are bigger than IPv4 and MPLS, and IPv6 ND packets
 * are bigger than TCP,UDP and IGMP packets. */
#define FLOW_MAX_PACKET_U64S (FLOW_U64S                                   \
    /* Unused in datapath */  - FLOW_U64_SIZE(regs)                       \
                              - FLOW_U64_SIZE(metadata)                   \
    /* L2.5/3 */              - FLOW_U64_SIZE(nw_src)  /* incl. nw_dst */ \
                              - FLOW_U64_SIZE(mpls_lse)                   \
    /* L4 */                  - FLOW_U64_SIZE(tp_src)                     \
                             )

extern const uint8_t flow_segment_u64s[];

/* Configured maximum VLAN headers. */
extern int flow_vlan_limit;

#define FLOW_U64_OFFSET(FIELD)                          \
    (offsetof(struct flow, FIELD) / sizeof(uint64_t))
#define FLOW_U64_OFFREM(FIELD)                          \
    (offsetof(struct flow, FIELD) % sizeof(uint64_t))

/* Number of 64-bit units spanned by a 'FIELD'. */
#define FLOW_U64_SIZE(FIELD)                                            \
    DIV_ROUND_UP(FLOW_U64_OFFREM(FIELD) + MEMBER_SIZEOF(struct flow, FIELD), \
                 sizeof(uint64_t))

void flow_extract(struct dp_packet *, struct flow *);

void flow_zero_wildcards(struct flow *, const struct flow_wildcards *);
void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *);
void flow_get_metadata(const struct flow *, struct match *flow_metadata);
struct netdev *flow_get_tunnel_netdev(struct flow_tnl *tunnel);

const char *ct_state_to_string(uint32_t state);
uint32_t ct_state_from_string(const char *);
bool parse_ct_state(const char *state_str, uint32_t default_state,
                    uint32_t *ct_state, struct ds *);
bool validate_ct_state(uint32_t state, struct ds *);
void flow_clear_conntrack(struct flow *);

char *flow_to_string(const struct flow *, const struct ofputil_port_map *);
void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t),
                  uint32_t flags, char del);
void format_flags_masked(struct ds *ds, const char *name,
                         const char *(*bit_to_string)(uint32_t),
                         uint32_t flags, uint32_t mask, uint32_t max_mask);
void format_packet_type_masked(struct ds *, ovs_be32 value, ovs_be32 mask);
int parse_flags(const char *s, const char *(*bit_to_string)(uint32_t),
                char end, const char *field_name, char **res_string,
                uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask);

void flow_format(struct ds *, const struct flow *,
                 const struct ofputil_port_map *);
void flow_print(FILE *, const struct flow *, const struct ofputil_port_map *);
static inline int flow_compare_3way(const struct flow *, const struct flow *);
static inline bool flow_equal(const struct flow *, const struct flow *);
static inline size_t flow_hash(const struct flow *, uint32_t basis);

void flow_set_dl_vlan(struct flow *, ovs_be16 vid, int id);
void flow_fix_vlan_tpid(struct flow *);
void flow_set_vlan_vid(struct flow *, ovs_be16 vid);
void flow_set_vlan_pcp(struct flow *, uint8_t pcp, int id);

void flow_limit_vlans(int vlan_limit);
int flow_count_vlan_headers(const struct flow *);
void flow_skip_common_vlan_headers(const struct flow *a, int *p_an,
                                   const struct flow *b, int *p_bn);
void flow_pop_vlan(struct flow*, struct flow_wildcards*);
void flow_push_vlan_uninit(struct flow*, struct flow_wildcards*);

int flow_count_mpls_labels(const struct flow *, struct flow_wildcards *);
int flow_count_common_mpls_labels(const struct flow *a, int an,
                                  const struct flow *b, int bn,
                                  struct flow_wildcards *wc);
void flow_push_mpls(struct flow *, int n, ovs_be16 mpls_eth_type,
                    struct flow_wildcards *, bool clear_flow_L3);
bool flow_pop_mpls(struct flow *, int n, ovs_be16 eth_type,
                   struct flow_wildcards *);
void flow_set_mpls_label(struct flow *, int idx, ovs_be32 label);
void flow_set_mpls_ttl(struct flow *, int idx, uint8_t ttl);
void flow_set_mpls_tc(struct flow *, int idx, uint8_t tc);
void flow_set_mpls_bos(struct flow *, int idx, uint8_t stack);
void flow_set_mpls_lse(struct flow *, int idx, ovs_be32 lse);

void flow_compose(struct dp_packet *, const struct flow *,
                  const void *l7, size_t l7_len);
void packet_expand(struct dp_packet *, const struct flow *, size_t size);

bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
                         uint8_t *nw_frag,
                         const struct ovs_16aligned_ip6_frag **frag_hdr);
bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key);
uint16_t parse_tcp_flags(struct dp_packet *packet);

static inline uint64_t
flow_get_xreg(const struct flow *flow, int idx)
{
    return ((uint64_t) flow->regs[idx * 2] << 32) | flow->regs[idx * 2 + 1];
}

static inline void
flow_set_xreg(struct flow *flow, int idx, uint64_t value)
{
    flow->regs[idx * 2] = value >> 32;
    flow->regs[idx * 2 + 1] = value;
}

static inline ovs_u128
flow_get_xxreg(const struct flow *flow, int idx)
{
    ovs_u128 value;

    value.u64.hi = (uint64_t) flow->regs[idx * 4] << 32;
    value.u64.hi |= flow->regs[idx * 4 + 1];
    value.u64.lo = (uint64_t) flow->regs[idx * 4 + 2] << 32;
    value.u64.lo |= flow->regs[idx * 4 + 3];

    return value;
}

static inline void
flow_set_xxreg(struct flow *flow, int idx, ovs_u128 value)
{
    flow->regs[idx * 4] = value.u64.hi >> 32;
    flow->regs[idx * 4 + 1] = value.u64.hi;
    flow->regs[idx * 4 + 2] = value.u64.lo >> 32;
    flow->regs[idx * 4 + 3] = value.u64.lo;
}

static inline int
flow_compare_3way(const struct flow *a, const struct flow *b)
{
    return memcmp(a, b, sizeof *a);
}

static inline bool
flow_equal(const struct flow *a, const struct flow *b)
{
    return !flow_compare_3way(a, b);
}

static inline size_t
flow_hash(const struct flow *flow, uint32_t basis)
{
    return hash_bytes64((const uint64_t *)flow, sizeof *flow, basis);
}

static inline uint16_t
ofp_to_u16(ofp_port_t ofp_port)
{
    return (OVS_FORCE uint16_t) ofp_port;
}

static inline uint32_t
odp_to_u32(odp_port_t odp_port)
{
    return (OVS_FORCE uint32_t) odp_port;
}

static inline uint32_t
ofp11_to_u32(ofp11_port_t ofp11_port)
{
    return (OVS_FORCE uint32_t) ofp11_port;
}

static inline ofp_port_t
u16_to_ofp(uint16_t port)
{
    return OFP_PORT_C(port);
}

static inline odp_port_t
u32_to_odp(uint32_t port)
{
    return ODP_PORT_C(port);
}

static inline ofp11_port_t
u32_to_ofp11(uint32_t port)
{
    return OFP11_PORT_C(port);
}

static inline uint32_t
hash_ofp_port(ofp_port_t ofp_port)
{
    return hash_int(ofp_to_u16(ofp_port), 0);
}

static inline uint32_t
hash_odp_port(odp_port_t odp_port)
{
    return hash_int(odp_to_u32(odp_port), 0);
}

uint32_t flow_hash_5tuple(const struct flow *flow, uint32_t basis);
uint32_t flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis);
uint32_t flow_hash_symmetric_l2(const struct flow *flow, uint32_t basis);
uint32_t flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
                         bool inc_udp_ports );
uint32_t flow_hash_symmetric_l3(const struct flow *flow, uint32_t basis);

/* Initialize a flow with random fields that matter for nx_hash_fields. */
void flow_random_hash_fields(struct flow *);
void flow_mask_hash_fields(const struct flow *, struct flow_wildcards *,
                           enum nx_hash_fields);
uint32_t flow_hash_fields(const struct flow *, enum nx_hash_fields,
                          uint16_t basis);
const char *flow_hash_fields_to_str(enum nx_hash_fields);
bool flow_hash_fields_valid(enum nx_hash_fields);

uint32_t flow_hash_in_wildcards(const struct flow *,
                                const struct flow_wildcards *,
                                uint32_t basis);

bool flow_equal_except(const struct flow *a, const struct flow *b,
                       const struct flow_wildcards *);

/* Bitmap for flow values.  For each 1-bit the corresponding flow value is
 * explicitly specified, other values are zeroes.
 *
 * map_t must be wide enough to hold any member of struct flow. */
typedef unsigned long long map_t;
#define MAP_T_BITS (sizeof(map_t) * CHAR_BIT)
#define MAP_1 (map_t)1
#define MAP_MAX TYPE_MAXIMUM(map_t)

#define MAP_IS_SET(MAP, IDX) ((MAP) & (MAP_1 << (IDX)))

/* Iterate through the indices of all 1-bits in 'MAP'. */
#define MAP_FOR_EACH_INDEX(IDX, MAP)            \
    ULLONG_FOR_EACH_1(IDX, MAP)

#define FLOWMAP_UNITS DIV_ROUND_UP(FLOW_U64S, MAP_T_BITS)

struct flowmap {
    map_t bits[FLOWMAP_UNITS];
};

#define FLOWMAP_EMPTY_INITIALIZER { { 0 } }

static inline void flowmap_init(struct flowmap *);
static inline bool flowmap_equal(struct flowmap, struct flowmap);
static inline bool flowmap_is_set(const struct flowmap *, size_t idx);
static inline bool flowmap_are_set(const struct flowmap *, size_t idx,
                                   unsigned int n_bits);
static inline void flowmap_set(struct flowmap *, size_t idx,
                               unsigned int n_bits);
static inline void flowmap_clear(struct flowmap *, size_t idx,
                                 unsigned int n_bits);
static inline struct flowmap flowmap_or(struct flowmap, struct flowmap);
static inline struct flowmap flowmap_and(struct flowmap, struct flowmap);
static inline bool flowmap_is_empty(struct flowmap);
static inline unsigned int flowmap_n_1bits(struct flowmap);

#define FLOWMAP_HAS_FIELD(FM, FIELD)                                    \
    flowmap_are_set(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))

#define FLOWMAP_SET(FM, FIELD)                                      \
    flowmap_set(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))

#define FLOWMAP_SET__(FM, FIELD, SIZE)                  \
    flowmap_set(FM, FLOW_U64_OFFSET(FIELD),             \
                DIV_ROUND_UP(SIZE, sizeof(uint64_t)))

/* XXX: Only works for full 64-bit units. */
#define FLOWMAP_CLEAR(FM, FIELD)                                        \
    BUILD_ASSERT_DECL(FLOW_U64_OFFREM(FIELD) == 0);                     \
    BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->FIELD) % sizeof(uint64_t) == 0); \
    flowmap_clear(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))

/* Iterate through all units in 'FMAP'. */
#define FLOWMAP_FOR_EACH_UNIT(UNIT)                     \
    for ((UNIT) = 0; (UNIT) < FLOWMAP_UNITS; (UNIT)++)

/* Iterate through all map units in 'FMAP'. */
#define FLOWMAP_FOR_EACH_MAP(MAP, FLOWMAP)                              \
    for (size_t unit__ = 0;                                       \
         unit__ < FLOWMAP_UNITS && ((MAP) = (FLOWMAP).bits[unit__], true); \
         unit__++)

struct flowmap_aux;
static inline bool flowmap_next_index(struct flowmap_aux *, size_t *idx);

#define FLOWMAP_AUX_INITIALIZER(FLOWMAP) { .unit = 0, .map = (FLOWMAP) }

/* Iterate through all struct flow u64 indices specified by 'MAP'.  This is a
 * slower but easier version of the FLOWMAP_FOR_EACH_MAP() &
 * MAP_FOR_EACH_INDEX() combination. */
#define FLOWMAP_FOR_EACH_INDEX(IDX, MAP)                            \
    for (struct flowmap_aux aux__ = FLOWMAP_AUX_INITIALIZER(MAP);   \
         flowmap_next_index(&aux__, &(IDX));)

/* Flowmap inline implementations. */
static inline void
flowmap_init(struct flowmap *fm)
{
    memset(fm, 0, sizeof *fm);
}

static inline bool
flowmap_equal(struct flowmap a, struct flowmap b)
{
    return !memcmp(&a, &b, sizeof a);
}

static inline bool
flowmap_is_set(const struct flowmap *fm, size_t idx)
{
    return (fm->bits[idx / MAP_T_BITS] & (MAP_1 << (idx % MAP_T_BITS))) != 0;
}

/* Returns 'true' if any of the 'n_bits' bits starting at 'idx' are set in
 * 'fm'.  'n_bits' can be at most MAP_T_BITS. */
static inline bool
flowmap_are_set(const struct flowmap *fm, size_t idx, unsigned int n_bits)
{
    map_t n_bits_mask = (MAP_1 << n_bits) - 1;
    size_t unit = idx / MAP_T_BITS;

    idx %= MAP_T_BITS;

    if (fm->bits[unit] & (n_bits_mask << idx)) {
        return true;
    }
    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
     * false-positive array out of bounds error by GCC 4.9. */
    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
        /* Check the remaining bits from the next unit. */
        return fm->bits[unit + 1] & (n_bits_mask >> (MAP_T_BITS - idx));
    }
    return false;
}

/* Set the 'n_bits' consecutive bits in 'fm', starting at bit 'idx'.
 * 'n_bits' can be at most MAP_T_BITS. */
static inline void
flowmap_set(struct flowmap *fm, size_t idx, unsigned int n_bits)
{
    map_t n_bits_mask = (MAP_1 << n_bits) - 1;
    size_t unit = idx / MAP_T_BITS;

    idx %= MAP_T_BITS;

    fm->bits[unit] |= n_bits_mask << idx;
    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
     * false-positive array out of bounds error by GCC 4.9. */
    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
        /* 'MAP_T_BITS - idx' bits were set on 'unit', set the remaining
         * bits from the next unit. */
        fm->bits[unit + 1] |= n_bits_mask >> (MAP_T_BITS - idx);
    }
}

/* Clears the 'n_bits' consecutive bits in 'fm', starting at bit 'idx'.
 * 'n_bits' can be at most MAP_T_BITS. */
static inline void
flowmap_clear(struct flowmap *fm, size_t idx, unsigned int n_bits)
{
    map_t n_bits_mask = (MAP_1 << n_bits) - 1;
    size_t unit = idx / MAP_T_BITS;

    idx %= MAP_T_BITS;

    fm->bits[unit] &= ~(n_bits_mask << idx);
    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
     * false-positive array out of bounds error by GCC 4.9. */
    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
        /* 'MAP_T_BITS - idx' bits were cleared on 'unit', clear the
         * remaining bits from the next unit. */
        fm->bits[unit + 1] &= ~(n_bits_mask >> (MAP_T_BITS - idx));
    }
}

/* OR the bits in the flowmaps. */
static inline struct flowmap
flowmap_or(struct flowmap a, struct flowmap b)
{
    struct flowmap map;
    size_t unit;

    FLOWMAP_FOR_EACH_UNIT (unit) {
        map.bits[unit] = a.bits[unit] | b.bits[unit];
    }
    return map;
}

/* AND the bits in the flowmaps. */
static inline struct flowmap
flowmap_and(struct flowmap a, struct flowmap b)
{
    struct flowmap map;
    size_t unit;

    FLOWMAP_FOR_EACH_UNIT (unit) {
        map.bits[unit] = a.bits[unit] & b.bits[unit];
    }
    return map;
}

static inline bool
flowmap_is_empty(struct flowmap fm)
{
    map_t map;

    FLOWMAP_FOR_EACH_MAP (map, fm) {
        if (map) {
            return false;
        }
    }
    return true;
}

static inline unsigned int
flowmap_n_1bits(struct flowmap fm)
{
    unsigned int n_1bits = 0;
    size_t unit;

    FLOWMAP_FOR_EACH_UNIT (unit) {
        n_1bits += count_1bits(fm.bits[unit]);
    }
    return n_1bits;
}

struct flowmap_aux {
    size_t unit;
    struct flowmap map;
};

static inline bool
flowmap_next_index(struct flowmap_aux *aux, size_t *idx)
{
    for (;;) {
        map_t *map = &aux->map.bits[aux->unit];
        if (*map) {
            *idx = aux->unit * MAP_T_BITS + raw_ctz(*map);
            *map = zero_rightmost_1bit(*map);
            return true;
        }
        if (++aux->unit >= FLOWMAP_UNITS) {
            return false;
        }
    }
}


/* Compressed flow. */

/* A sparse representation of a "struct flow".
 *
 * A "struct flow" is fairly large and tends to be mostly zeros.  Sparse
 * representation has two advantages.  First, it saves memory and, more
 * importantly, minimizes the number of accessed cache lines.  Second, it saves
 * time when the goal is to iterate over only the nonzero parts of the struct.
 *
 * The map member hold one bit for each uint64_t in a "struct flow".  Each
 * 0-bit indicates that the corresponding uint64_t is zero, each 1-bit that it
 * *may* be nonzero (see below how this applies to minimasks).
 *
 * The values indicated by 'map' always follow the miniflow in memory.  The
 * user of the miniflow is responsible for always having enough storage after
 * the struct miniflow corresponding to the number of 1-bits in maps.
 *
 * Elements in values array are allowed to be zero.  This is useful for "struct
 * minimatch", for which ensuring that the miniflow and minimask members have
 * same maps allows optimization.  This allowance applies only to a miniflow
 * that is not a mask.  That is, a minimask may NOT have zero elements in its
 * values.
 *
 * A miniflow is always dynamically allocated so that the maps are followed by
 * at least as many elements as there are 1-bits in maps. */
struct miniflow {
    struct flowmap map;
    /* Followed by:
     *     uint64_t values[n];
     * where 'n' is miniflow_n_values(miniflow). */
};
BUILD_ASSERT_DECL(sizeof(struct miniflow) % sizeof(uint64_t) == 0);

#define MINIFLOW_VALUES_SIZE(COUNT) ((COUNT) * sizeof(uint64_t))

static inline uint64_t *miniflow_values(struct miniflow *mf)
{
    return (uint64_t *)(mf + 1);
}

static inline const uint64_t *miniflow_get_values(const struct miniflow *mf)
{
    return (const uint64_t *)(mf + 1);
}

struct pkt_metadata;

/* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units.
 * 'dst->map' is ignored on input and set on output to indicate which fields
 * were extracted. */
void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
void miniflow_map_init(struct miniflow *, const struct flow *);
void flow_wc_map(const struct flow *, struct flowmap *);
size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
                      const struct miniflow *src);
void miniflow_init(struct miniflow *, const struct flow *);
void miniflow_clone(struct miniflow *, const struct miniflow *,
                    size_t n_values);
struct miniflow * miniflow_create(const struct flow *);

void miniflow_expand(const struct miniflow *, struct flow *);

static inline uint64_t flow_u64_value(const struct flow *flow, size_t index)
{
    return ((uint64_t *)flow)[index];
}

static inline uint64_t *flow_u64_lvalue(struct flow *flow, size_t index)
{
    return &((uint64_t *)flow)[index];
}

static inline size_t
miniflow_n_values(const struct miniflow *flow)
{
    return flowmap_n_1bits(flow->map);
}

struct flow_for_each_in_maps_aux {
    const struct flow *flow;
    struct flowmap_aux map_aux;
};

static inline bool
flow_values_get_next_in_maps(struct flow_for_each_in_maps_aux *aux,
                             uint64_t *value)
{
    size_t idx;

    if (flowmap_next_index(&aux->map_aux, &idx)) {
        *value = flow_u64_value(aux->flow, idx);
        return true;
    }
    return false;
}

/* Iterate through all flow u64 values specified by 'MAPS'. */
#define FLOW_FOR_EACH_IN_MAPS(VALUE, FLOW, MAPS)            \
    for (struct flow_for_each_in_maps_aux aux__             \
             = { (FLOW), FLOWMAP_AUX_INITIALIZER(MAPS) };   \
         flow_values_get_next_in_maps(&aux__, &(VALUE));)

struct mf_for_each_in_map_aux {
    size_t unit;             /* Current 64-bit unit of the flowmaps
                                being processed. */
    struct flowmap fmap;     /* Remaining 1-bits corresponding to the
                                64-bit words in ‘values’ */
    struct flowmap map;      /* Remaining 1-bits corresponding to the
                                64-bit words of interest. */
    const uint64_t *values;  /* 64-bit words corresponding to the
                                1-bits in ‘fmap’. */
};

/* Get the data from ‘aux->values’ corresponding to the next lowest 1-bit
 * in ‘aux->map’, given that ‘aux->values’ points to an array of 64-bit
 * words corresponding to the 1-bits in ‘aux->fmap’, starting from the
 * rightmost 1-bit.
 *
 * Returns ’true’ if the traversal is incomplete, ‘false’ otherwise.
 * ‘aux’ is prepared for the next iteration after each call.
 *
 * This is used to traverse through, for example, the values in a miniflow
 * representation of a flow key selected by non-zero 64-bit words in a
 * corresponding subtable mask. */
static inline bool
mf_get_next_in_map(struct mf_for_each_in_map_aux *aux,
                   uint64_t *value)
{
    map_t *map, *fmap;
    map_t rm1bit;

    /* Skip empty map units. */
    while (OVS_UNLIKELY(!*(map = &aux->map.bits[aux->unit]))) {
        /* Skip remaining data in the current unit before advancing
         * to the next. */
        aux->values += count_1bits(aux->fmap.bits[aux->unit]);
        if (++aux->unit == FLOWMAP_UNITS) {
            return false;
        }
    }

    rm1bit = rightmost_1bit(*map);
    *map -= rm1bit;
    fmap = &aux->fmap.bits[aux->unit];

    /* If the rightmost 1-bit found from the current unit in ‘aux->map’
     * (‘rm1bit’) is also present in ‘aux->fmap’, store the corresponding
     * value from ‘aux->values’ to ‘*value', otherwise store 0. */
    if (OVS_LIKELY(*fmap & rm1bit)) {
        /* Skip all 64-bit words in ‘values’ preceding the one corresponding
         * to ‘rm1bit’. */
        map_t trash = *fmap & (rm1bit - 1);

        /* Avoid resetting 'fmap' and calling count_1bits() when trash is
         * zero. */
        if (trash) {
            *fmap -= trash;
            aux->values += count_1bits(trash);
        }

        *value = *aux->values;
    } else {
        *value = 0;
    }
    return true;
}

/* Iterate through miniflow u64 values specified by 'FLOWMAP'. */
#define MINIFLOW_FOR_EACH_IN_FLOWMAP(VALUE, FLOW, FLOWMAP)          \
    for (struct mf_for_each_in_map_aux aux__ =                      \
        { 0, (FLOW)->map, (FLOWMAP), miniflow_get_values(FLOW) };   \
         mf_get_next_in_map(&aux__, &(VALUE));)

/* This can be used when it is known that 'idx' is set in 'map'. */
static inline const uint64_t *
miniflow_values_get__(const uint64_t *values, map_t map, size_t idx)
{
    return values + count_1bits(map & ((MAP_1 << idx) - 1));
}

/* This can be used when it is known that 'u64_idx' is set in
 * the map of 'mf'. */
static inline const uint64_t *
miniflow_get__(const struct miniflow *mf, size_t idx)
{
    const uint64_t *values = miniflow_get_values(mf);
    const map_t *map = mf->map.bits;

    while (idx >= MAP_T_BITS) {
        idx -= MAP_T_BITS;
        values += count_1bits(*map++);
    }
    return miniflow_values_get__(values, *map, idx);
}

#define MINIFLOW_IN_MAP(MF, IDX) flowmap_is_set(&(MF)->map, IDX)

/* Get the value of the struct flow 'FIELD' as up to 8 byte wide integer type
 * 'TYPE' from miniflow 'MF'. */
#define MINIFLOW_GET_TYPE(MF, TYPE, FIELD)                              \
    (BUILD_ASSERT(sizeof(TYPE) == sizeof(((struct flow *)0)->FIELD)),   \
     BUILD_ASSERT_GCCONLY(__builtin_types_compatible_p(TYPE, typeof(((struct flow *)0)->FIELD))), \
     MINIFLOW_GET_TYPE__(MF, TYPE, FIELD))

/* Like MINIFLOW_GET_TYPE, but without checking that TYPE is the correct width
 * for FIELD.  (This is useful for deliberately reading adjacent fields in one
 * go.)  */
#define MINIFLOW_GET_TYPE__(MF, TYPE, FIELD)                            \
    (MINIFLOW_IN_MAP(MF, FLOW_U64_OFFSET(FIELD))                        \
     ? ((OVS_FORCE const TYPE *)miniflow_get__(MF, FLOW_U64_OFFSET(FIELD))) \
     [FLOW_U64_OFFREM(FIELD) / sizeof(TYPE)]                            \
     : 0)

#define MINIFLOW_GET_U128(FLOW, FIELD)                                  \
    (ovs_u128) { .u64 = {                                               \
            (MINIFLOW_IN_MAP(FLOW, FLOW_U64_OFFSET(FIELD)) ?            \
             *miniflow_get__(FLOW, FLOW_U64_OFFSET(FIELD)) : 0),        \
            (MINIFLOW_IN_MAP(FLOW, FLOW_U64_OFFSET(FIELD) + 1) ?        \
             *miniflow_get__(FLOW, FLOW_U64_OFFSET(FIELD) + 1) : 0) } }

#define MINIFLOW_GET_U8(FLOW, FIELD)            \
    MINIFLOW_GET_TYPE(FLOW, uint8_t, FIELD)
#define MINIFLOW_GET_U16(FLOW, FIELD)           \
    MINIFLOW_GET_TYPE(FLOW, uint16_t, FIELD)
#define MINIFLOW_GET_BE16(FLOW, FIELD)          \
    MINIFLOW_GET_TYPE(FLOW, ovs_be16, FIELD)
#define MINIFLOW_GET_U32(FLOW, FIELD)           \
    MINIFLOW_GET_TYPE(FLOW, uint32_t, FIELD)
#define MINIFLOW_GET_BE32(FLOW, FIELD)          \
    MINIFLOW_GET_TYPE(FLOW, ovs_be32, FIELD)
#define MINIFLOW_GET_U64(FLOW, FIELD)           \
    MINIFLOW_GET_TYPE(FLOW, uint64_t, FIELD)
#define MINIFLOW_GET_BE64(FLOW, FIELD)          \
    MINIFLOW_GET_TYPE(FLOW, ovs_be64, FIELD)

static inline uint64_t miniflow_get(const struct miniflow *,
                                    unsigned int u64_ofs);
static inline uint32_t miniflow_get_u32(const struct miniflow *,
                                        unsigned int u32_ofs);
static inline ovs_be32 miniflow_get_be32(const struct miniflow *,
                                         unsigned int be32_ofs);
static inline uint16_t miniflow_get_vid(const struct miniflow *, size_t);
static inline uint16_t miniflow_get_tcp_flags(const struct miniflow *);
static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
static inline uint64_t miniflow_get_tun_metadata_present_map(
    const struct miniflow *);
static inline uint32_t miniflow_get_recirc_id(const struct miniflow *);
static inline uint32_t miniflow_get_dp_hash(const struct miniflow *);
static inline ovs_be32 miniflow_get_ports(const struct miniflow *);

bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
bool miniflow_equal_in_minimask(const struct miniflow *a,
                                const struct miniflow *b,
                                const struct minimask *);
bool miniflow_equal_flow_in_minimask(const struct miniflow *a,
                                     const struct flow *b,
                                     const struct minimask *);
uint32_t miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis);


/* Compressed flow wildcards. */

/* A sparse representation of a "struct flow_wildcards".
 *
 * See the large comment on struct miniflow for details.
 *
 * Note: While miniflow can have zero data for a 1-bit in the map,
 * a minimask may not!  We rely on this in the implementation. */
struct minimask {
    struct miniflow masks;
};

void minimask_init(struct minimask *, const struct flow_wildcards *);
struct minimask * minimask_create(const struct flow_wildcards *);
void minimask_combine(struct minimask *dst,
                      const struct minimask *a, const struct minimask *b,
                      uint64_t storage[FLOW_U64S]);

void minimask_expand(const struct minimask *, struct flow_wildcards *);

static inline uint32_t minimask_get_u32(const struct minimask *,
                                        unsigned int u32_ofs);
static inline ovs_be32 minimask_get_be32(const struct minimask *,
                                         unsigned int be32_ofs);
static inline uint16_t minimask_get_vid_mask(const struct minimask *, size_t);
static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *);

bool minimask_equal(const struct minimask *a, const struct minimask *b);
bool minimask_has_extra(const struct minimask *, const struct minimask *);


/* Returns true if 'mask' matches every packet, false if 'mask' fixes any bits
 * or fields. */
static inline bool
minimask_is_catchall(const struct minimask *mask)
{
    /* For every 1-bit in mask's map, the corresponding value is non-zero,
     * so the only way the mask can not fix any bits or fields is for the
     * map the be zero. */
    return flowmap_is_empty(mask->masks.map);
}

/* Returns the uint64_t that would be at byte offset '8 * u64_ofs' if 'flow'
 * were expanded into a "struct flow". */
static inline uint64_t miniflow_get(const struct miniflow *flow,
                                    unsigned int u64_ofs)
{
    return MINIFLOW_IN_MAP(flow, u64_ofs) ? *miniflow_get__(flow, u64_ofs) : 0;
}

static inline uint32_t miniflow_get_u32(const struct miniflow *flow,
                                        unsigned int u32_ofs)
{
    uint64_t value = miniflow_get(flow, u32_ofs / 2);

#if WORDS_BIGENDIAN
    return (u32_ofs & 1) ? value : value >> 32;
#else
    return (u32_ofs & 1) ? value >> 32 : value;
#endif
}

static inline ovs_be32 miniflow_get_be32(const struct miniflow *flow,
                                         unsigned int be32_ofs)
{
    return (OVS_FORCE ovs_be32)miniflow_get_u32(flow, be32_ofs);
}

/* Returns the VID within the vlan_tci member of the "struct flow" represented
 * by 'flow'. */
static inline uint16_t
miniflow_get_vid(const struct miniflow *flow, size_t n)
{
    if (n < FLOW_MAX_VLAN_HEADERS) {
        union flow_vlan_hdr hdr = {
            .qtag = MINIFLOW_GET_BE32(flow, vlans[n].qtag)
        };
        return vlan_tci_to_vid(hdr.tci);
    }
    return 0;
}

/* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if 'mask'
 * were expanded into a "struct flow_wildcards". */
static inline uint32_t
minimask_get_u32(const struct minimask *mask, unsigned int u32_ofs)
{
    return miniflow_get_u32(&mask->masks, u32_ofs);
}

static inline ovs_be32
minimask_get_be32(const struct minimask *mask, unsigned int be32_ofs)
{
    return (OVS_FORCE ovs_be32)minimask_get_u32(mask, be32_ofs);
}

/* Returns the VID mask within the vlan_tci member of the "struct
 * flow_wildcards" represented by 'mask'. */
static inline uint16_t
minimask_get_vid_mask(const struct minimask *mask, size_t n)
{
    return miniflow_get_vid(&mask->masks, n);
}

/* Returns the value of the "tcp_flags" field in 'flow'. */
static inline uint16_t
miniflow_get_tcp_flags(const struct miniflow *flow)
{
    return ntohs(MINIFLOW_GET_BE16(flow, tcp_flags));
}

/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */
static inline ovs_be64
miniflow_get_metadata(const struct miniflow *flow)
{
    return MINIFLOW_GET_BE64(flow, metadata);
}

/* Returns the bitmap that indicates which tunnel metadata fields are present
 * in 'flow'. */
static inline uint64_t
miniflow_get_tun_metadata_present_map(const struct miniflow *flow)
{
    return MINIFLOW_GET_U64(flow, tunnel.metadata.present.map);
}

/* Returns the recirc_id in 'flow.' */
static inline uint32_t
miniflow_get_recirc_id(const struct miniflow *flow)
{
    return MINIFLOW_GET_U32(flow, recirc_id);
}

/* Returns the dp_hash in 'flow.' */
static inline uint32_t
miniflow_get_dp_hash(const struct miniflow *flow)
{
    return MINIFLOW_GET_U32(flow, dp_hash);
}

/* Returns the 'tp_src' and 'tp_dst' fields together as one piece of data. */
static inline ovs_be32
miniflow_get_ports(const struct miniflow *flow)
{
    return MINIFLOW_GET_TYPE__(flow, ovs_be32, tp_src);
}

/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
 *
 * The return value is all-1-bits if 'mask' matches on the whole value of the
 * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field,
 * or some other value if the metadata field is partially matched, partially
 * wildcarded. */
static inline ovs_be64
minimask_get_metadata_mask(const struct minimask *mask)
{
    return MINIFLOW_GET_BE64(&mask->masks, metadata);
}

/* Perform a bitwise OR of miniflow 'src' flow data specified in 'subset' with
 * the equivalent fields in 'dst', storing the result in 'dst'.  'subset' must
 * be a subset of 'src's map. */
static inline void
flow_union_with_miniflow_subset(struct flow *dst, const struct miniflow *src,
                                struct flowmap subset)
{
    uint64_t *dst_u64 = (uint64_t *) dst;
    const uint64_t *p = miniflow_get_values(src);
    map_t map;

    FLOWMAP_FOR_EACH_MAP (map, subset) {
        size_t idx;

        MAP_FOR_EACH_INDEX(idx, map) {
            dst_u64[idx] |= *p++;
        }
        dst_u64 += MAP_T_BITS;
    }
}

/* Perform a bitwise OR of miniflow 'src' flow data with the equivalent
 * fields in 'dst', storing the result in 'dst'. */
static inline void
flow_union_with_miniflow(struct flow *dst, const struct miniflow *src)
{
    flow_union_with_miniflow_subset(dst, src, src->map);
}

static inline bool is_ct_valid(const struct flow *flow,
                               const struct flow_wildcards *mask,
                               struct flow_wildcards *wc)
{
    /* Matches are checked with 'mask' and without 'wc'. */
    if (mask && !wc) {
        /* Must match at least one of the bits that implies a valid
         * conntrack entry, or an explicit not-invalid. */
        return flow->ct_state & (CS_NEW | CS_ESTABLISHED | CS_RELATED
                                 | CS_REPLY_DIR | CS_SRC_NAT | CS_DST_NAT)
            || (flow->ct_state & CS_TRACKED
                && mask->masks.ct_state & CS_INVALID
                && !(flow->ct_state & CS_INVALID));
    }
    /* Else we are checking a fully extracted flow, where valid CT state always
     * has either 'new', 'established', or 'reply_dir' bit set. */
#define CS_VALID_MASK (CS_NEW | CS_ESTABLISHED | CS_REPLY_DIR)
    if (wc) {
        wc->masks.ct_state |= CS_VALID_MASK;
    }
    return flow->ct_state & CS_VALID_MASK;
}

static inline void
pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
{
    /* Update this function whenever struct flow changes. */
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);

    md->recirc_id = flow->recirc_id;
    md->dp_hash = flow->dp_hash;
    flow_tnl_copy__(&md->tunnel, &flow->tunnel);
    md->skb_priority = flow->skb_priority;
    md->pkt_mark = flow->pkt_mark;
    md->in_port = flow->in_port;
    md->ct_state = flow->ct_state;
    md->ct_zone = flow->ct_zone;
    md->ct_mark = flow->ct_mark;
    md->ct_label = flow->ct_label;

    md->ct_orig_tuple_ipv6 = false;
    if (flow->dl_type && is_ct_valid(flow, NULL, NULL)) {
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
            md->ct_orig_tuple.ipv4 = (struct ovs_key_ct_tuple_ipv4) {
                flow->ct_nw_src,
                flow->ct_nw_dst,
                flow->ct_tp_src,
                flow->ct_tp_dst,
                flow->ct_nw_proto,
            };
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
            md->ct_orig_tuple_ipv6 = true;
            md->ct_orig_tuple.ipv6 = (struct ovs_key_ct_tuple_ipv6) {
                flow->ct_ipv6_src,
                flow->ct_ipv6_dst,
                flow->ct_tp_src,
                flow->ct_tp_dst,
                flow->ct_nw_proto,
            };
        } else {
            /* Reset ct_orig_tuple for other types. */
            memset(&md->ct_orig_tuple, 0, sizeof md->ct_orig_tuple);
        }
    } else {
        memset(&md->ct_orig_tuple, 0, sizeof md->ct_orig_tuple);
    }
}

/* Often, during translation we need to read a value from a flow('FLOW') and
 * unwildcard the corresponding bits in the wildcards('WC').  This macro makes
 * it easier to do that. */

#define FLOW_WC_GET_AND_MASK_WC(FLOW, WC, FIELD) \
    (((WC) ? WC_MASK_FIELD(WC, FIELD) : NULL), ((FLOW)->FIELD))

static inline bool is_ethernet(const struct flow *flow,
                               struct flow_wildcards *wc)
{
    if (wc) {
        WC_MASK_FIELD(wc, packet_type);
    }
    return flow->packet_type == htonl(PT_ETH);
}

static inline ovs_be16 get_dl_type(const struct flow *flow)
{
    if (flow->packet_type == htonl(PT_ETH)) {
        return flow->dl_type;
    } else if (pt_ns(flow->packet_type) == OFPHTN_ETHERTYPE) {
        return pt_ns_type_be(flow->packet_type);
    } else {
        return htons(FLOW_DL_TYPE_NONE);
    }
}

static inline bool is_vlan(const struct flow *flow,
                           struct flow_wildcards *wc)
{
    if (!is_ethernet(flow, wc)) {
        return false;
    }
    if (wc) {
        WC_MASK_FIELD_MASK(wc, vlans[0].tci, htons(VLAN_CFI));
    }
    return (flow->vlans[0].tci & htons(VLAN_CFI)) != 0;
}

static inline bool is_ip_any(const struct flow *flow)
{
    return dl_type_is_ip_any(get_dl_type(flow));
}

static inline bool is_ip_proto(const struct flow *flow, uint8_t ip_proto,
                               struct flow_wildcards *wc)
{
    if (is_ip_any(flow)) {
        if (wc) {
            WC_MASK_FIELD(wc, nw_proto);
        }
        return flow->nw_proto == ip_proto;
    }
    return false;
}

static inline bool is_tcp(const struct flow *flow,
                          struct flow_wildcards *wc)
{
    return is_ip_proto(flow, IPPROTO_TCP, wc);
}

static inline bool is_udp(const struct flow *flow,
                          struct flow_wildcards *wc)
{
    return is_ip_proto(flow, IPPROTO_UDP, wc);
}

static inline bool is_sctp(const struct flow *flow,
                           struct flow_wildcards *wc)
{
    return is_ip_proto(flow, IPPROTO_SCTP, wc);
}

static inline bool is_icmpv4(const struct flow *flow,
                             struct flow_wildcards *wc)
{
    if (get_dl_type(flow) == htons(ETH_TYPE_IP)) {
        if (wc) {
            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
        }
        return flow->nw_proto == IPPROTO_ICMP;
    }
    return false;
}

static inline bool is_icmpv6(const struct flow *flow,
                             struct flow_wildcards *wc)
{
    if (get_dl_type(flow) == htons(ETH_TYPE_IPV6)) {
        if (wc) {
            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
        }
        return flow->nw_proto == IPPROTO_ICMPV6;
    }
    return false;
}

static inline bool is_nd(const struct flow *flow,
                         struct flow_wildcards *wc)
{
    if (is_icmpv6(flow, wc)) {
        if (wc) {
            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
        }
        if (flow->tp_dst != htons(0)) {
            return false;
        }

        if (wc) {
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
        }
        return (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) ||
                flow->tp_src == htons(ND_NEIGHBOR_ADVERT));
    }
    return false;
}

static inline bool is_arp(const struct flow *flow)
{
    return (flow->dl_type == htons(ETH_TYPE_ARP));
}

static inline bool is_garp(const struct flow *flow,
                           struct flow_wildcards *wc)
{
    if (is_arp(flow)) {
        return (FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src) ==
                FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_dst));
    }

    return false;
}

static inline bool is_igmp(const struct flow *flow, struct flow_wildcards *wc)
{
    if (get_dl_type(flow) == htons(ETH_TYPE_IP)) {
        if (wc) {
            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
        }
        return flow->nw_proto == IPPROTO_IGMP;
    }
    return false;
}

static inline bool is_mld(const struct flow *flow,
                          struct flow_wildcards *wc)
{
    if (is_icmpv6(flow, wc)) {
        if (wc) {
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
        }
        return (flow->tp_src == htons(MLD_QUERY)
                || flow->tp_src == htons(MLD_REPORT)
                || flow->tp_src == htons(MLD_DONE)
                || flow->tp_src == htons(MLD2_REPORT));
    }
    return false;
}

static inline bool is_mld_query(const struct flow *flow,
                                struct flow_wildcards *wc)
{
    if (is_icmpv6(flow, wc)) {
        if (wc) {
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
        }
        return flow->tp_src == htons(MLD_QUERY);
    }
    return false;
}

static inline bool is_mld_report(const struct flow *flow,
                                 struct flow_wildcards *wc)
{
    return is_mld(flow, wc) && !is_mld_query(flow, wc);
}

static inline bool is_stp(const struct flow *flow)
{
    return (flow->dl_type == htons(FLOW_DL_TYPE_NONE)
            && eth_addr_equals(flow->dl_dst, eth_addr_stp));
}

/* Returns true if flow->tp_dst equals 'port'.  If 'wc' is nonnull, sets
 * appropriate bits in wc->masks.tp_dst to account for the test.
 *
 * The caller must already have ensured that 'flow' is a protocol for which
 * tp_dst is relevant. */
static inline bool tp_dst_equals(const struct flow *flow, uint16_t port,
                                 struct flow_wildcards *wc)
{
    uint16_t diff = port ^ ntohs(flow->tp_dst);
    if (wc) {
        if (diff) {
            /* Set mask for the most significant mismatching bit. */
            int ofs = raw_clz64((uint64_t) diff << 48); /* range [0,15] */
            wc->masks.tp_dst |= htons(0x8000 >> ofs);
        } else {
            /* Must match all bits. */
            wc->masks.tp_dst = OVS_BE16_MAX;
        }
    }
    return !diff;
}

#endif /* flow.h */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								/*
-												flow: New function ct_state_from_string().

This will have its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Miguel Angel Ajo <majopela@redhat.com>

											
										
										
											2017-04-17 16:06:35 -07:00
+								 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 */
 								#ifndef FLOW_H
 								#define FLOW_H 1
-												Work around bugs in system headers.

On some system, at least, one must include <sys/types.h> before
<netinet/in.h>, and <netinet/in.h> before <arpa/inet.h> or <net/if.h>.

From Jean Tourrilhes <jt@hpl.hp.com>.

											
										
										
											2010-02-12 12:51:36 -08:00
+								#include <sys/types.h>
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include <netinet/in.h>
-												flow: New function is_nd().

This simplifies a few pieces of code and will acquire another user in an
upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-02 11:35:29 -07:00
+								#include <netinet/icmp6.h>
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include <stdbool.h>
 								#include <stdint.h>
 								#include <string.h>
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								#include "bitmap.h"
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								#include "byte-order.h"
-												Break flow.h into private and public parts

Public (struct definitions and some prototypes) go in
include/openvswitch

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-04-04 21:32:03 -04:00
+								#include "openvswitch/compiler.h"
-												tunneling: Add support for tunnel ID.

Add a tun_id field which contains the ID of the encapsulating tunnel
on which a packet was received (0 if not received on a tunnel).  Also
add an action which allows the tunnel ID to be set for outgoing
packets.  At this point there aren't any tunnel implementations so
these fields don't have any effect.

The matching is exposed to OpenFlow by overloading the high 32 bits
of the cookie as the tunnel ID.  ovs-ofctl is capable of turning
on this special behavior using a new "tun-cookie" command but this
command is intentially undocumented to avoid it being used without
a full understanding of the consequences.

											
										
										
											2010-04-12 11:49:16 -04:00
+								#include "openflow/nicira-ext.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include "openflow/openflow.h"
-												Break flow.h into private and public parts

Public (struct definitions and some prototypes) go in
include/openvswitch

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-04-04 21:32:03 -04:00
+								#include "openvswitch/flow.h"
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								#include "packets.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include "hash.h"
 								#include "util.h"
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								struct dpif_flow_stats;
-												dpctl: Properly reflect a rule's offloaded to HW state

Previously, any rule that is offloaded via a netdev, not necessarily
to the HW, would be reported as "offloaded". This patch fixes this
misalignment, and introduces the 'dp' state, as follows:

rule is in HW via TC offload  -> offloaded=yes dp:tc
rule is in not HW over TC DP  -> offloaded=no  dp:tc
rule is in not HW over OVS DP -> offloaded=no  dp:ovs

To achieve this, the flows's 'offloaded' flag was encapsulated in a new
attrs struct, which contains the offloaded state of the flow and the
DP layer the flow is handled in, and instead of setting the flow's
'offloaded' state based solely on the type of dump it was acquired
via, for netdev flows it now sends the new attrs struct to be
collected along with the rest of the flow via the netdev, allowing
it to be set per flow.

For TC offloads, the offloaded state is set based on the 'in_hw' and
'not_in_hw' flags received from the TC as part of the flower. If no
such flag was received, due to lack of kernel support, it defaults
to true.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
[simon: resolved conflict in lib/dpctl.man]
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-06-07 09:36:59 +03:00
+								struct dpif_flow_attrs;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								struct ds;
-												flow: Better abstract flow_wildcards and use it more widely.

											
										
										
											2010-11-08 10:37:35 -08:00
+								struct flow_wildcards;
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								struct minimask;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								struct dp_packet;
-												Support accepting and displaying port names in OVS tools.

Until now, most ovs-ofctl commands have not accepted names for ports, only
numbers, and have not been able to display port names either.  It's a lot
easier for users if they can use and see meaningful names instead of
arbitrary numbers.  This commit adds that support.

For backward compatibility, only interactive ovs-ofctl commands by default
display port names; to display them in scripts, use the new --names
option.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2017-05-31 16:06:12 -07:00
+								struct ofputil_port_map;
-												lib: simplify flow_extract() API

Change the flow_extract() API to accept struct pkt_metadata,
instead of individual metadata fields. It will make the API more
logical and easier to maintain when we need to expand metadata
down the road.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>¬

											
										
										
											2014-02-26 18:08:04 -08:00
+								struct pkt_metadata;
-												ofp-util: Convert flow_metadata to match structure.

We have a special flow_metadata structure to represent the parts
of a packet that aren't carried in the payload itself. This is
used in the case where we need to send the packet as a Packet In
to an OpenFlow controller. This is a subset of the more general
struct flow.

In practice, almost all operations we do on this structure involve
converting it to or from a match or have code that is the same as
a match. Serialization to NXM and back is done as a match. There
is special flow_metadata formatting code that is almost identical
to match formatting.

The uses for struct flow_metadata aren't performance critical
when it comes to memory, so we can save quite a bit of code by
just using a match structure directly instead. In addition, as
metadata increases and becomes more complex (Geneve options require
some special handling beyond just additional fields), using the
match structure means we only have to do this work in one place.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-05-15 17:03:17 -07:00
+								struct match;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												dpif-netdev: reduce netdev_flow_key size

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-09-20 07:30:02 +00:00
+								/* Some flow fields are mutually exclusive or only appear within the flow
 								 * pipeline.  IPv6 headers are bigger than IPv4 and MPLS, and IPv6 ND packets
 								 * are bigger than TCP,UDP and IGMP packets. */
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								#define FLOW_MAX_PACKET_U64S (FLOW_U64S                                   \
 								    /* Unused in datapath */  - FLOW_U64_SIZE(regs)                       \
 								                              - FLOW_U64_SIZE(metadata)                   \
 								    /* L2.5/3 */              - FLOW_U64_SIZE(nw_src)  /* incl. nw_dst */ \
 								                              - FLOW_U64_SIZE(mpls_lse)                   \
 								    /* L4 */                  - FLOW_U64_SIZE(tp_src)                     \
-												dpif-netdev: reduce netdev_flow_key size

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-09-20 07:30:02 +00:00
+								                             )
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								extern const uint8_t flow_segment_u64s[];
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								/* Configured maximum VLAN headers. */
 								extern int flow_vlan_limit;
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								#define FLOW_U64_OFFSET(FIELD)                          \
 								    (offsetof(struct flow, FIELD) / sizeof(uint64_t))
 								#define FLOW_U64_OFFREM(FIELD)                          \
 								    (offsetof(struct flow, FIELD) % sizeof(uint64_t))
 								/* Number of 64-bit units spanned by a 'FIELD'. */
 								#define FLOW_U64_SIZE(FIELD)                                            \
 								    DIV_ROUND_UP(FLOW_U64_OFFREM(FIELD) + MEMBER_SIZEOF(struct flow, FIELD), \
 								                 sizeof(uint64_t))
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								void flow_extract(struct dp_packet *, struct flow *);
-												User-Space MPLS actions and matches

This patch implements use-space datapath and non-datapath code
to match and use the datapath API set out in Leo Alterman's patch
"user-space datapath: Add basic MPLS support to kernel".

The resulting MPLS implementation supports:
* Pushing a single MPLS label
* Poping a single MPLS label
* Modifying an MPLS lable using set-field or load actions
  that act on the label value, tc and bos bit.
* There is no support for manipulating the TTL
  this is considered future work.

The single-level push pop limitation is implemented by processing
push, pop and set-field/load actions in order and discarding information
that would require multiple levels of push/pop to be supported.

e.g.
   push,push -> the first push is discarded
   pop,pop -> the first pop is discarded

This patch is based heavily on work by Ravi K.

Cc: Ravi K <rkerur@gmail.com>
Reviewed-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-25 16:22:07 +09:00
-												classifier: Move zero_wildcards() to flow.c as public flow_zero_wildcards().

This function will soon be used elsewhere.  As it doesn't inherently have
anything to with the classifier, move it to flow.c.

											
										
										
											2011-08-19 09:39:16 -07:00
+								void flow_zero_wildcards(struct flow *, const struct flow_wildcards *);
-												flow: New function flow_unwildcard_tp_ports().

This patch adds a new function flow_unildcard_tp_ports() which doesn't
unwildcard the upper half of tp_src and tp_dst with ICMP packets.
Unfortunately, this matters in future patches when we compare masks
carefully to determine if flows should be evicted from the datapath.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-06 18:53:12 -08:00
+								void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *);
-												ofp-util: Convert flow_metadata to match structure.

We have a special flow_metadata structure to represent the parts
of a packet that aren't carried in the payload itself. This is
used in the case where we need to send the packet as a Packet In
to an OpenFlow controller. This is a subset of the more general
struct flow.

In practice, almost all operations we do on this structure involve
converting it to or from a match or have code that is the same as
a match. Serialization to NXM and back is done as a match. There
is special flow_metadata formatting code that is almost identical
to match formatting.

The uses for struct flow_metadata aren't performance critical
when it comes to memory, so we can save quite a bit of code by
just using a match structure directly instead. In addition, as
metadata increases and becomes more complex (Geneve options require
some special handling beyond just additional fields), using the
match structure means we only have to do this work in one place.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-05-15 17:03:17 -07:00
+								void flow_get_metadata(const struct flow *, struct match *flow_metadata);
-												dpif-netlink: Detect Out-Of-Resource condition on a netdev

This is the first patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The patch detects OOR condition on a netdev port when ENOSPC error is
returned by TC-Flower while adding a flow rule. A new structure is added
to the netdev called "netdev_hw_info", to store OOR related information
required to perform dynamic offload-rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:12 +05:30
+								struct netdev *flow_get_tunnel_netdev(struct flow_tnl *tunnel);
-												classifier: Move zero_wildcards() to flow.c as public flow_zero_wildcards().

This function will soon be used elsewhere.  As it doesn't inherently have
anything to with the classifier, move it to flow.c.

											
										
										
											2011-08-19 09:39:16 -07:00
-												Add support for connection tracking.

This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.

Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.

Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.

The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:

- "commit": When commit is executed, the connection moves from
  uncommitted state to committed state. This signals that information
  about the connection should be stored beyond the lifetime of the
  packet within the pipeline. This allows future packets in the same
  connection to be recognized as part of the same "established" (est)
  connection, as well as identifying packets in the reply (rpl)
  direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
  Each zone is an independent connection tracking context. When the
  "commit" parameter is used, the connection will only be committed in
  the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
  of the packet will continue processing the current actions list as an
  untracked packet. An additional instance of the packet will be sent to
  the connection tracker, which will be re-injected into the OpenFlow
  pipeline to resume processing in the specified table, with the
  ct_state and other ct match fields set. If the table is not specified,
  then the packet is submitted to the connection tracker, but the
  pipeline does not fork and the ct match fields are not populated. It
  is strongly recommended to specify a table later than the current
  table to prevent loops.

When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:

- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.

For more information, consult the ovs-ofctl(8) man pages.

Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:

    table=0,priority=1,action=drop
    table=0,arp,action=normal
    table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
    table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
    table=1,in_port=2,ct_state=+trk+est,tcp,action=1
    table=1,in_port=2,ct_state=+trk+new,tcp,action=drop

Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-08-11 10:56:09 -07:00
+								const char *ct_state_to_string(uint32_t state);
-												flow: New function ct_state_from_string().

This will have its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Miguel Angel Ajo <majopela@redhat.com>

											
										
										
											2017-04-17 16:06:35 -07:00
+								uint32_t ct_state_from_string(const char *);
-												conntrack: Move ct_state parsing to lib/flow.c

This patch moves conntrack state parsing function from ovn-trace.c to
lib/flow.c, because it will be used by ofproto/trace unixctl command
later on. It also updates the ct_state checking logic, since we no longer
assume CS_TRACKED is enable by default.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-27 11:11:32 -07:00
+								bool parse_ct_state(const char *state_str, uint32_t default_state,
 								                    uint32_t *ct_state, struct ds *);
 								bool validate_ct_state(uint32_t state, struct ds *);
-												flow: New function flow_clear_conntrack().

This will have a new user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Miguel Angel Ajo <majopela@redhat.com>

											
										
										
											2017-04-18 11:22:05 -07:00
+								void flow_clear_conntrack(struct flow *);
-												flow: New function ct_state_from_string().

This will have its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Miguel Angel Ajo <majopela@redhat.com>

											
										
										
											2017-04-17 16:06:35 -07:00
-												Support accepting and displaying port names in OVS tools.

Until now, most ovs-ofctl commands have not accepted names for ports, only
numbers, and have not been able to display port names either.  It's a lot
easier for users if they can use and see meaningful names instead of
arbitrary numbers.  This commit adds that support.

For backward compatibility, only interactive ovs-ofctl commands by default
display port names; to display them in scripts, use the new --names
option.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2017-05-31 16:06:12 -07:00
+								char *flow_to_string(const struct flow *, const struct ofputil_port_map *);
-												vswitchd: Log all tunnel parameters of given flow.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2012-11-21 18:51:36 -08:00
+								void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t),
 								                  uint32_t flags, char del);
-												lib: More intuitive syntax for TCP flags matching.

Allow TCP flags match specification with symbolic flag names.  TCP
flags are optionally specified as a string of flag names, each
preceded by '+' when the flag must be one, or '-' when the flag must
be zero.  Any flags not explicitly included are wildcarded.  The
existing hex syntax is still allowed, and is used in flow dumps when
all the flags are matched.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-02 15:14:09 -08:00
+								void format_flags_masked(struct ds *ds, const char *name,
 								                         const char *(*bit_to_string)(uint32_t),
-												flow: Factor out flag parsing and formatting routines.

There are several implementations of functions that parse/format
flags and their binary representation. This factors them out into
common routines. In addition to reducing code, it also makes things
more consistent across different parts of OVS.

Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2015-07-11 20:48:29 -07:00
+								                         uint32_t flags, uint32_t mask, uint32_t max_mask);
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								void format_packet_type_masked(struct ds *, ovs_be32 value, ovs_be32 mask);
-												flow: Factor out flag parsing and formatting routines.

There are several implementations of functions that parse/format
flags and their binary representation. This factors them out into
common routines. In addition to reducing code, it also makes things
more consistent across different parts of OVS.

Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2015-07-11 20:48:29 -07:00
+								int parse_flags(const char *s, const char *(*bit_to_string)(uint32_t),
 								                char end, const char *field_name, char **res_string,
 								                uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask);
-												vswitchd: Log all tunnel parameters of given flow.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2012-11-21 18:51:36 -08:00
-												Support accepting and displaying port names in OVS tools.

Until now, most ovs-ofctl commands have not accepted names for ports, only
numbers, and have not been able to display port names either.  It's a lot
easier for users if they can use and see meaningful names instead of
arbitrary numbers.  This commit adds that support.

For backward compatibility, only interactive ovs-ofctl commands by default
display port names; to display them in scripts, use the new --names
option.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2017-05-31 16:06:12 -07:00
+								void flow_format(struct ds *, const struct flow *,
 								                 const struct ofputil_port_map *);
 								void flow_print(FILE *, const struct flow *, const struct ofputil_port_map *);
-												flow: Rename flow_compare() to flow_compare_3way().

I like the _3way suffix convention.  It makes the interpretation of the
return value clear.  We use it elsewhere in the tree, so use it here too.

There weren't any users of flow_compare() outside of flow.h, but there soon
will be.

											
										
										
											2011-10-25 16:33:38 -07:00
+								static inline int flow_compare_3way(const struct flow *, const struct flow *);
-												flow: Get rid of flow_t typedef.

When userspace and the kernel were using the same structure for flows,
flow_t was a useful way to indicate that a structure was really a userspace
flow instead of a kernel one, but now it's better to just write "struct
flow" for consistency, since OVS doesn't use typedefs for structs
elsewhere.

Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-09-03 11:30:02 -07:00
+								static inline bool flow_equal(const struct flow *, const struct flow *);
 								static inline size_t flow_hash(const struct flow *, uint32_t basis);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												flow: Refactor some of VLAN helper functions

By default, these function are to change the first vlan vid and pcp
in the flow. Add a parameter as index for vlans if we want to handle
the second ones.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-07-17 02:01:56 +00:00
+								void flow_set_dl_vlan(struct flow *, ovs_be16 vid, int id);
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								void flow_fix_vlan_tpid(struct flow *);
-												meta-flow: Add OF1.2-like MFF_VLAN_VID and MFF_VLAN_PCP.

OpenFlow 1.0 and 1.2 have notions of VLAN that are different
enough to warrant separate "meta-flow" fields, which this commit
adds.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Simon Horman <horms@verge.net.au>

											
										
										
											2012-07-22 23:20:22 -07:00
+								void flow_set_vlan_vid(struct flow *, ovs_be16 vid);
-												flow: Refactor some of VLAN helper functions

By default, these function are to change the first vlan vid and pcp
in the flow. Add a parameter as index for vlans if we want to handle
the second ones.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-07-17 02:01:56 +00:00
+								void flow_set_vlan_pcp(struct flow *, uint8_t pcp, int id);
-												flow: New functions for setting a VLAN VID or PCP value.

											
										
										
											2011-11-21 14:14:02 -08:00
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								void flow_limit_vlans(int vlan_limit);
 								int flow_count_vlan_headers(const struct flow *);
 								void flow_skip_common_vlan_headers(const struct flow *a, int *p_an,
 								                                   const struct flow *b, int *p_bn);
 								void flow_pop_vlan(struct flow*, struct flow_wildcards*);
 								void flow_push_vlan_uninit(struct flow*, struct flow_wildcards*);
-												Enhance userspace support for MPLS, for up to 3 labels.

This commit makes the userspace support for MPLS more complete.  Now
up to 3 labels are supported.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Simon Horman <horms@verge.net.au>

											
										
										
											2014-02-04 10:32:35 -08:00
+								int flow_count_mpls_labels(const struct flow *, struct flow_wildcards *);
 								int flow_count_common_mpls_labels(const struct flow *a, int an,
 								                                  const struct flow *b, int bn,
 								                                  struct flow_wildcards *wc);
 								void flow_push_mpls(struct flow *, int n, ovs_be16 mpls_eth_type,
-												mpls: Fix MPLS restoration after patch port and group bucket.

This patch fixes problems with MPLS handling related to patch ports
and group buckets.

If a group bucket or a peer bridge across a patch port pushes MPLS
headers to a non-MPLS packet and outputs, the flow translation after
returning from the group bucket or patch port would undo the packet
transformations so that the processing could continue with the packet
as it was before entering the patch port.  There were two problems
with this:

1. As part of the first MPLS push on a non-MPLS packet, the flow
translation would first clear the L3/4 headers of the 'flow' to mark
those fields invalid.  Later, when committing 'flow' changes to
datapath actions before output, the necessary datapath MPLS actions
are created and the corresponding changes updated to the 'base flow'.
This was done using the same flow_push_mpls() function that clears
the L2/3 headers, so also the 'base flow' L2/3 headers were cleared.

Then, when translation returns from a patch port or group bucket, the
original 'flow' is restored, now showing no sign of the MPLS labels.
Since the 'base flow' now has the MPLS labels, following translations
know to issue MPLS POP actions before any output actions.  However, as
part of checking for changes to IP headers we test that the IP
protocol type was not changed.  But now the 'base flow's 'nw_proto'
field is zero and an assert fail crashes OVS.

This is solved by not clearing the L3/4 fields of the 'base
flow'. This allows the processing after the patch port to continue
with L3/4 fields as if no MPLS was done, after first issuing the
necessary MPLS POP actions.

2. IP header updates were done before the MPLS POP actions were
issued. This caused incorrect packet output after, e.g., group action
or patch port.  For example, with actions:

group 1234: all bucket=push_mpls,output:LOCAL

ip actions=group:1234,dec_ttl,output:LOCAL,output:LOCAL

the dec_ttl would only be executed before the last output to LOCAL,
since at the time of committing IP changes after the group action the
packet was still an MPLS packet.

This is solved by checking the dl_type of both 'flow' and 'base flow'
and issuing MPLS actions if they can transform the packet from an MPLS
packet to a non-MPLS packet.  For an IP packet the change in ttl can
then be correctly committed before the last two output actions.

Two test cases are added to prevent future regressions.

Reported-by: Thomas Morin <thomas.morin@orange.com>
Suggested-by: Takashi YAMAMOTO <yamamoto@ovn.org>
Fixes: 8bfd0fdac ("Enhance userspace support for MPLS, for up to 3 labels.")
Fixes: 1b035ef20 ("mpls: Allow l3 and l4 actions to prior to a push_mpls action")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: YAMAMOTO Takashi <yamamoto@ovn.org>
											
										
										
											2016-12-01 14:05:24 -08:00
+								                    struct flow_wildcards *, bool clear_flow_L3);
-												Enhance userspace support for MPLS, for up to 3 labels.

This commit makes the userspace support for MPLS more complete.  Now
up to 3 labels are supported.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Simon Horman <horms@verge.net.au>

											
										
										
											2014-02-04 10:32:35 -08:00
+								bool flow_pop_mpls(struct flow *, int n, ovs_be16 eth_type,
 								                   struct flow_wildcards *);
 								void flow_set_mpls_label(struct flow *, int idx, ovs_be32 label);
 								void flow_set_mpls_ttl(struct flow *, int idx, uint8_t ttl);
 								void flow_set_mpls_tc(struct flow *, int idx, uint8_t tc);
 								void flow_set_mpls_bos(struct flow *, int idx, uint8_t stack);
 								void flow_set_mpls_lse(struct flow *, int idx, ovs_be32 lse);
-												User-Space MPLS actions and matches

This patch implements use-space datapath and non-datapath code
to match and use the datapath API set out in Leo Alterman's patch
"user-space datapath: Add basic MPLS support to kernel".

The resulting MPLS implementation supports:
* Pushing a single MPLS label
* Poping a single MPLS label
* Modifying an MPLS lable using set-field or load actions
  that act on the label value, tc and bos bit.
* There is no support for manipulating the TTL
  this is considered future work.

The single-level push pop limitation is implemented by processing
push, pop and set-field/load actions in order and discarding information
that would require multiple levels of push/pop to be supported.

e.g.
   push,push -> the first push is discarded
   pop,pop -> the first pop is discarded

This patch is based heavily on work by Ravi K.

Cc: Ravi K <rkerur@gmail.com>
Reviewed-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-25 16:22:07 +09:00
-												flow: Add some L7 payload data to most L4 protocols that accept it.

This makes traffic generated by flow_compose() look slightly more
realistic.  It requires lots of updates to tests, but at least the tests
themselves should be slightly more realistic too.

At the same time, add --l7 and --l7-len options to ofproto/trace to allow
users to specify the amount or contents of payloads that they want.

Suggested-by: Brad Cowie <brad@cowie.nz>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-26 14:36:05 -08:00
+								void flow_compose(struct dp_packet *, const struct flow *,
 								                  const void *l7, size_t l7_len);
 								void packet_expand(struct dp_packet *, const struct flow *, size_t size);
-												ofproto-dpif: Add -generate option to ofproto/trace command.

											
										
										
											2011-09-08 14:32:13 -07:00
-												flow: Export parse_ipv6_ext_hdrs().

This will be used by a future commit.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Flavio Leitner <fbl@sysclose.org>

											
										
										
											2015-11-15 22:07:25 -08:00
+								bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
-												flow: Enhance parse_ipv6_ext_hdrs.

Acked-by: Justin Pettit <jpettit@ovn.org>
Signed-off-by: Darrell Ball <dlu998@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-13 15:34:15 -08:00
+								                         uint8_t *nw_frag,
 								                         const struct ovs_16aligned_ip6_frag **frag_hdr);
-												nsh: add new flow key 'ttl'

IETF NSH draft added a new filed ttl in NSH header, this patch
is to add new nsh key 'ttl' for it.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-11 13:24:01 +08:00
+								bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key);
-												dpif-netdev: retrieve flow directly from the flow mark

So that we could skip some very costly CPU operations, including but
not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus,
performance could be greatly improved.

A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and
1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that
260% performance boost.

Note that though the heavy miniflow_extract is skipped, we still have
to do per packet checking, due to we have to check the tcp_flags.

Co-authored-by: Finn Christensen <fc@napatech.com>
Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
Signed-off-by: Finn Christensen <fc@napatech.com>
Co-authored-by: Shahaf Shuler <shahafs@mellanox.com>
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Signed-off-by: Ian Stokes <ian.stokes@intel.com>

											
										
										
											2018-06-25 16:21:05 +03:00
+								uint16_t parse_tcp_flags(struct dp_packet *packet);
-												flow: Export parse_ipv6_ext_hdrs().

This will be used by a future commit.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Flavio Leitner <fbl@sysclose.org>

											
										
										
											2015-11-15 22:07:25 -08:00
-												meta-flow: Add 64-bit registers.

These 64-bit registers are intended to conform with the OpenFlow 1.5
draft specification.

EXT-244.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-28 09:50:37 -07:00
+								static inline uint64_t
 								flow_get_xreg(const struct flow *flow, int idx)
 								{
 								    return ((uint64_t) flow->regs[idx * 2] << 32) | flow->regs[idx * 2 + 1];
 								}
 								static inline void
 								flow_set_xreg(struct flow *flow, int idx, uint64_t value)
 								{
 								    flow->regs[idx * 2] = value >> 32;
 								    flow->regs[idx * 2 + 1] = value;
 								}
-												Introduce 128-bit xxregs.

These are needed to handle IPv6 addresses.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-10-31 04:45:28 -07:00
+								static inline ovs_u128
 								flow_get_xxreg(const struct flow *flow, int idx)
 								{
 								    ovs_u128 value;
 								    value.u64.hi = (uint64_t) flow->regs[idx * 4] << 32;
 								    value.u64.hi |= flow->regs[idx * 4 + 1];
 								    value.u64.lo = (uint64_t) flow->regs[idx * 4 + 2] << 32;
 								    value.u64.lo |= flow->regs[idx * 4 + 3];
 								    return value;
 								}
 								static inline void
 								flow_set_xxreg(struct flow *flow, int idx, ovs_u128 value)
 								{
 								    flow->regs[idx * 4] = value.u64.hi >> 32;
 								    flow->regs[idx * 4 + 1] = value.u64.hi;
 								    flow->regs[idx * 4 + 2] = value.u64.lo >> 32;
 								    flow->regs[idx * 4 + 3] = value.u64.lo;
 								}
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								static inline int
-												flow: Rename flow_compare() to flow_compare_3way().

I like the _3way suffix convention.  It makes the interpretation of the
return value clear.  We use it elsewhere in the tree, so use it here too.

There weren't any users of flow_compare() outside of flow.h, but there soon
will be.

											
										
										
											2011-10-25 16:33:38 -07:00
+								flow_compare_3way(const struct flow *a, const struct flow *b)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												flow: Take advantage of zero-padding in struct flow and flow_wildcards.

Since we know these bytes are always 0 in both structures, we can use
faster functions that only work with full words.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-06-18 15:12:57 -07:00
+								    return memcmp(a, b, sizeof *a);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
 								static inline bool
-												flow: Get rid of flow_t typedef.

When userspace and the kernel were using the same structure for flows,
flow_t was a useful way to indicate that a structure was really a userspace
flow instead of a kernel one, but now it's better to just write "struct
flow" for consistency, since OVS doesn't use typedefs for structs
elsewhere.

Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-09-03 11:30:02 -07:00
+								flow_equal(const struct flow *a, const struct flow *b)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												flow: Rename flow_compare() to flow_compare_3way().

I like the _3way suffix convention.  It makes the interpretation of the
return value clear.  We use it elsewhere in the tree, so use it here too.

There weren't any users of flow_compare() outside of flow.h, but there soon
will be.

											
										
										
											2011-10-25 16:33:38 -07:00
+								    return !flow_compare_3way(a, b);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
 								static inline size_t
-												flow: Get rid of flow_t typedef.

When userspace and the kernel were using the same structure for flows,
flow_t was a useful way to indicate that a structure was really a userspace
flow instead of a kernel one, but now it's better to just write "struct
flow" for consistency, since OVS doesn't use typedefs for structs
elsewhere.

Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-09-03 11:30:02 -07:00
+								flow_hash(const struct flow *flow, uint32_t basis)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												hash: New helper functions hash_bytes32() and hash_bytes64().

All of the callers of hash_words() and hash_words64() actually find it
easier to pass in the number of bytes instead of the number of 32-bit
or 64-bit words.  These new functions allow the callers to be a little
simpler.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-18 22:52:48 -08:00
+								    return hash_bytes64((const uint64_t *)flow, sizeof *flow, basis);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								static inline uint16_t
 								ofp_to_u16(ofp_port_t ofp_port)
 								{
 								    return (OVS_FORCE uint16_t) ofp_port;
 								}
 								static inline uint32_t
 								odp_to_u32(odp_port_t odp_port)
 								{
 								    return (OVS_FORCE uint32_t) odp_port;
 								}
 								static inline uint32_t
 								ofp11_to_u32(ofp11_port_t ofp11_port)
 								{
 								    return (OVS_FORCE uint32_t) ofp11_port;
 								}
 								static inline ofp_port_t
 								u16_to_ofp(uint16_t port)
 								{
 								    return OFP_PORT_C(port);
 								}
 								static inline odp_port_t
 								u32_to_odp(uint32_t port)
 								{
 								    return ODP_PORT_C(port);
 								}
 								static inline ofp11_port_t
 								u32_to_ofp11(uint32_t port)
 								{
 								    return OFP11_PORT_C(port);
 								}
-												flow: New function hash_ofp_port() and hash_ofp_port().

These functions are used so often, that having an easy to read
helper is worth it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-22 10:33:27 -07:00
+								static inline uint32_t
 								hash_ofp_port(ofp_port_t ofp_port)
 								{
 								    return hash_int(ofp_to_u16(ofp_port), 0);
 								}
 								static inline uint32_t
 								hash_odp_port(odp_port_t odp_port)
 								{
 								    return hash_int(odp_to_u32(odp_port), 0);
 								}
-												flow: Simplify many functions for working with flows and wildcards.

Now that "struct flow" and "struct flow_wildcards" have the same simple
and uniform structure, it's easy to handle common operations by just
iterating over the bits inside them.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 13:43:18 -07:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								uint32_t flow_hash_5tuple(const struct flow *flow, uint32_t basis);
-												lib: Move l4 flow hash to the flow library.

This commit moves hash_symmetric_l4() to the flow library so that
it may be used in future patches.

											
										
										
											2011-02-01 18:50:25 -08:00
+								uint32_t flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis);
-												userspace datapath: Add OVS_HASH_L4_SYMMETRIC dp_hash algorithm

This commit implements a new dp_hash algorithm OVS_HASH_L4_SYMMETRIC in
the netdev datapath. It will be used as default hash algorithm for the
dp_hash-based select groups in a subsequent commit to maintain
compatibility with the symmetry property of the current default hash
selection method.

A new dpif_backer_support field 'max_hash_alg' is introduced to reflect
the highest hash algorithm a datapath supports in the dp_hash action.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Nitin Katiyar <nitin.katiyar@ericsson.com>
Co-authored-by: Nitin Katiyar <nitin.katiyar@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-05-24 17:27:59 +02:00
+								uint32_t flow_hash_symmetric_l2(const struct flow *flow, uint32_t basis);
-												hash: Add symmetric L3/L4 hash functions for multipath, bundle hashing.

Signed-off-by: Jeroen van Bemmel <jvb127@gmail.com>
[blp@nicira.com made code style fixes, expanded documentation]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-06 12:58:24 -05:00
+								uint32_t flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
 								                         bool inc_udp_ports );
-												bundle: add symmetric_l3 hash method for multipath

Add a symmetric_l3 hash method that uses both network destination
address and network source address.

VMware-BZ: #2112940
Signed-off-by: Martin Xu <martinxu9.ovs@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-10-02 09:40:09 -07:00
+								uint32_t flow_hash_symmetric_l3(const struct flow *flow, uint32_t basis);
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												More accurate wildcarding and fragment handling.

This patch gets rid of the need for having explicit padding in struct
flow as new fields are being added.  flow_wildcards_init_exact(), which
used to set bits in both compiler generated and explicit padding, is
removed.  match_wc_init() is now used instead, which generates the mask
based on a given flow, setting bits only in fields which make sense.

Places where random bits were placed in struct flow have been changed to
only set random bits on fields that are significant in the given context.
This avoids setting padding bits.

- lib/flow:
  - Properly initialize struct flow also in places we used to zero out
    padding before.
  - Add flow_random_hash_fields() used for testing.
  - Remove flow_wildcards_init_exact() to avoid initializing
     masks where compiler generated padding has bits set.
- lib/match.c match_wc_init(): Wildcard transport layer fields for later
  fragments, remove match_init_exact(), which used
  flow_wildcards_init_exact().
- tests/test-flows.c: use match_wc_init() instead of match_init_exact()
- tests/flowgen.pl: generate more accurate packets and flows when
  fragmenting, mark unavailable fields as wildcarded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-17 14:28:20 -07:00
+								/* Initialize a flow with random fields that matter for nx_hash_fields. */
 								void flow_random_hash_fields(struct flow *);
-												flow: Only un-wildcard relevant IP headers.

When determining the fields to un-wildcard, we need to be careful
about only un-wildcarding fields that are relevant.  Also, we
didn't properly handle IPv6 addresses.

Signed-off-by: Justin Pettit <jpettit@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-06-26 16:37:16 -07:00
+								void flow_mask_hash_fields(const struct flow *, struct flow_wildcards *,
 								                           enum nx_hash_fields);
-												nicira-ext: Generalize nx_mp_fields into nx_hash_fields.

Future patches will use nx_hash_fields for non-multipath related
actions.  This patch renames nx_mp_fields and creates a new
flow_hash_fields() function.

											
										
										
											2011-07-13 16:20:24 -07:00
+								uint32_t flow_hash_fields(const struct flow *, enum nx_hash_fields,
 								                          uint16_t basis);
 								const char *flow_hash_fields_to_str(enum nx_hash_fields);
 								bool flow_hash_fields_valid(enum nx_hash_fields);
-												Better abstract wildcards for Ethernet destination field.

I think that this makes nx-match.c a little easier to read.  The new
functions added here will have more users in an upcoming patch.

											
										
										
											2011-06-06 14:21:40 -07:00
-												flow: Add new wildcard functions.

Rename the function flow_wildcards_combine() to flow_wildcards_and().
Add new flow_wildcards_or() and flow_hash_in_wildcards() functions.
These will be useful in a future patch.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2013-06-10 22:48:58 -07:00
+								uint32_t flow_hash_in_wildcards(const struct flow *,
 								                                const struct flow_wildcards *,
 								                                uint32_t basis);
-												flow: Simplify many functions for working with flows and wildcards.

Now that "struct flow" and "struct flow_wildcards" have the same simple
and uniform structure, it's easy to handle common operations by just
iterating over the bits inside them.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 13:43:18 -07:00
+								bool flow_equal_except(const struct flow *a, const struct flow *b,
 								                       const struct flow_wildcards *);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Bitmap for flow values.  For each 1-bit the corresponding flow value is
 								 * explicitly specified, other values are zeroes.
 								 *
 								 * map_t must be wide enough to hold any member of struct flow. */
 								typedef unsigned long long map_t;
 								#define MAP_T_BITS (sizeof(map_t) * CHAR_BIT)
 								#define MAP_1 (map_t)1
 								#define MAP_MAX TYPE_MAXIMUM(map_t)
 								#define MAP_IS_SET(MAP, IDX) ((MAP) & (MAP_1 << (IDX)))
 								/* Iterate through the indices of all 1-bits in 'MAP'. */
 								#define MAP_FOR_EACH_INDEX(IDX, MAP)            \
 								    ULLONG_FOR_EACH_1(IDX, MAP)
 								#define FLOWMAP_UNITS DIV_ROUND_UP(FLOW_U64S, MAP_T_BITS)
 								struct flowmap {
 								    map_t bits[FLOWMAP_UNITS];
 								};
 								#define FLOWMAP_EMPTY_INITIALIZER { { 0 } }
 								static inline void flowmap_init(struct flowmap *);
 								static inline bool flowmap_equal(struct flowmap, struct flowmap);
 								static inline bool flowmap_is_set(const struct flowmap *, size_t idx);
 								static inline bool flowmap_are_set(const struct flowmap *, size_t idx,
 								                                   unsigned int n_bits);
 								static inline void flowmap_set(struct flowmap *, size_t idx,
 								                               unsigned int n_bits);
 								static inline void flowmap_clear(struct flowmap *, size_t idx,
 								                                 unsigned int n_bits);
 								static inline struct flowmap flowmap_or(struct flowmap, struct flowmap);
 								static inline struct flowmap flowmap_and(struct flowmap, struct flowmap);
 								static inline bool flowmap_is_empty(struct flowmap);
 								static inline unsigned int flowmap_n_1bits(struct flowmap);
 								#define FLOWMAP_HAS_FIELD(FM, FIELD)                                    \
 								    flowmap_are_set(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))
 								#define FLOWMAP_SET(FM, FIELD)                                      \
 								    flowmap_set(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))
 								#define FLOWMAP_SET__(FM, FIELD, SIZE)                  \
 								    flowmap_set(FM, FLOW_U64_OFFSET(FIELD),             \
 								                DIV_ROUND_UP(SIZE, sizeof(uint64_t)))
 								/* XXX: Only works for full 64-bit units. */
 								#define FLOWMAP_CLEAR(FM, FIELD)                                        \
 								    BUILD_ASSERT_DECL(FLOW_U64_OFFREM(FIELD) == 0);                     \
 								    BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->FIELD) % sizeof(uint64_t) == 0); \
 								    flowmap_clear(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))
 								/* Iterate through all units in 'FMAP'. */
 								#define FLOWMAP_FOR_EACH_UNIT(UNIT)                     \
 								    for ((UNIT) = 0; (UNIT) < FLOWMAP_UNITS; (UNIT)++)
 								/* Iterate through all map units in 'FMAP'. */
 								#define FLOWMAP_FOR_EACH_MAP(MAP, FLOWMAP)                              \
 								    for (size_t unit__ = 0;                                       \
 								         unit__ < FLOWMAP_UNITS && ((MAP) = (FLOWMAP).bits[unit__], true); \
 								         unit__++)
 								struct flowmap_aux;
 								static inline bool flowmap_next_index(struct flowmap_aux *, size_t *idx);
 								#define FLOWMAP_AUX_INITIALIZER(FLOWMAP) { .unit = 0, .map = (FLOWMAP) }
 								/* Iterate through all struct flow u64 indices specified by 'MAP'.  This is a
 								 * slower but easier version of the FLOWMAP_FOR_EACH_MAP() &
 								 * MAP_FOR_EACH_INDEX() combination. */
 								#define FLOWMAP_FOR_EACH_INDEX(IDX, MAP)                            \
 								    for (struct flowmap_aux aux__ = FLOWMAP_AUX_INITIALIZER(MAP);   \
 								         flowmap_next_index(&aux__, &(IDX));)
 								/* Flowmap inline implementations. */
 								static inline void
 								flowmap_init(struct flowmap *fm)
 								{
 								    memset(fm, 0, sizeof *fm);
 								}
 								static inline bool
 								flowmap_equal(struct flowmap a, struct flowmap b)
 								{
 								    return !memcmp(&a, &b, sizeof a);
 								}
 								static inline bool
 								flowmap_is_set(const struct flowmap *fm, size_t idx)
 								{
 								    return (fm->bits[idx / MAP_T_BITS] & (MAP_1 << (idx % MAP_T_BITS))) != 0;
 								}
 								/* Returns 'true' if any of the 'n_bits' bits starting at 'idx' are set in
 								 * 'fm'.  'n_bits' can be at most MAP_T_BITS. */
 								static inline bool
 								flowmap_are_set(const struct flowmap *fm, size_t idx, unsigned int n_bits)
 								{
 								    map_t n_bits_mask = (MAP_1 << n_bits) - 1;
 								    size_t unit = idx / MAP_T_BITS;
 								    idx %= MAP_T_BITS;
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    if (fm->bits[unit] & (n_bits_mask << idx)) {
 								        return true;
 								    }
-												flow: Workaround for GCC false-positive compilation error.

Without an explicit bounds check GCC 4.9 issues an array out of bounds
error.  This patch adds explicit checks which will however be
optimized away as the relevant parameters are compile-time constants.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-08-27 10:48:03 -07:00
+								    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
 								     * false-positive array out of bounds error by GCC 4.9. */
 								    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        /* Check the remaining bits from the next unit. */
 								        return fm->bits[unit + 1] & (n_bits_mask >> (MAP_T_BITS - idx));
 								    }
 								    return false;
 								}
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Set the 'n_bits' consecutive bits in 'fm', starting at bit 'idx'.
 								 * 'n_bits' can be at most MAP_T_BITS. */
 								static inline void
 								flowmap_set(struct flowmap *fm, size_t idx, unsigned int n_bits)
 								{
 								    map_t n_bits_mask = (MAP_1 << n_bits) - 1;
 								    size_t unit = idx / MAP_T_BITS;
 								    idx %= MAP_T_BITS;
 								    fm->bits[unit] |= n_bits_mask << idx;
-												flow: Workaround for GCC false-positive compilation error.

Without an explicit bounds check GCC 4.9 issues an array out of bounds
error.  This patch adds explicit checks which will however be
optimized away as the relevant parameters are compile-time constants.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-08-27 10:48:03 -07:00
+								    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
 								     * false-positive array out of bounds error by GCC 4.9. */
 								    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        /* 'MAP_T_BITS - idx' bits were set on 'unit', set the remaining
 								         * bits from the next unit. */
 								        fm->bits[unit + 1] |= n_bits_mask >> (MAP_T_BITS - idx);
 								    }
 								}
 								/* Clears the 'n_bits' consecutive bits in 'fm', starting at bit 'idx'.
 								 * 'n_bits' can be at most MAP_T_BITS. */
 								static inline void
 								flowmap_clear(struct flowmap *fm, size_t idx, unsigned int n_bits)
 								{
 								    map_t n_bits_mask = (MAP_1 << n_bits) - 1;
 								    size_t unit = idx / MAP_T_BITS;
 								    idx %= MAP_T_BITS;
 								    fm->bits[unit] &= ~(n_bits_mask << idx);
-												flow: Workaround for GCC false-positive compilation error.

Without an explicit bounds check GCC 4.9 issues an array out of bounds
error.  This patch adds explicit checks which will however be
optimized away as the relevant parameters are compile-time constants.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-08-27 10:48:03 -07:00
+								    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
 								     * false-positive array out of bounds error by GCC 4.9. */
 								    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        /* 'MAP_T_BITS - idx' bits were cleared on 'unit', clear the
 								         * remaining bits from the next unit. */
 								        fm->bits[unit + 1] &= ~(n_bits_mask >> (MAP_T_BITS - idx));
 								    }
 								}
 								/* OR the bits in the flowmaps. */
 								static inline struct flowmap
-												flow: Fix MSVC compile errors.

This fixes some MSVC build errors introduced by commit 74ff3298c
(userspace: Define and use struct eth_addr.)

MSVC doesn't like the change in 'const' between function declaration and
definition: it reports "formal parameter 2 different from declaration" for
each of the functions in flow.h corrected by this (commit.  I think it's
technically wrong about that, standards-wise.)

MSVC doesn't like an empty-brace initializer.  (I think it's technically
right about that, standards-wise.)

This commit attempts to fix both problems, but I have not tested it with
MSVC.

CC: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Nithin Raju <nithin@vmware.com>
Tested-by: Nithin Raju <nithin@vmware.com>

											
										
										
											2015-08-30 07:40:31 -07:00
+								flowmap_or(struct flowmap a, struct flowmap b)
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								{
 								    struct flowmap map;
 								    size_t unit;
 								    FLOWMAP_FOR_EACH_UNIT (unit) {
 								        map.bits[unit] = a.bits[unit] | b.bits[unit];
 								    }
 								    return map;
 								}
 								/* AND the bits in the flowmaps. */
 								static inline struct flowmap
-												flow: Fix MSVC compile errors.

This fixes some MSVC build errors introduced by commit 74ff3298c
(userspace: Define and use struct eth_addr.)

MSVC doesn't like the change in 'const' between function declaration and
definition: it reports "formal parameter 2 different from declaration" for
each of the functions in flow.h corrected by this (commit.  I think it's
technically wrong about that, standards-wise.)

MSVC doesn't like an empty-brace initializer.  (I think it's technically
right about that, standards-wise.)

This commit attempts to fix both problems, but I have not tested it with
MSVC.

CC: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Nithin Raju <nithin@vmware.com>
Tested-by: Nithin Raju <nithin@vmware.com>

											
										
										
											2015-08-30 07:40:31 -07:00
+								flowmap_and(struct flowmap a, struct flowmap b)
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								{
 								    struct flowmap map;
 								    size_t unit;
 								    FLOWMAP_FOR_EACH_UNIT (unit) {
 								        map.bits[unit] = a.bits[unit] & b.bits[unit];
 								    }
 								    return map;
 								}
 								static inline bool
-												flow: Fix MSVC compile errors.

This fixes some MSVC build errors introduced by commit 74ff3298c
(userspace: Define and use struct eth_addr.)

MSVC doesn't like the change in 'const' between function declaration and
definition: it reports "formal parameter 2 different from declaration" for
each of the functions in flow.h corrected by this (commit.  I think it's
technically wrong about that, standards-wise.)

MSVC doesn't like an empty-brace initializer.  (I think it's technically
right about that, standards-wise.)

This commit attempts to fix both problems, but I have not tested it with
MSVC.

CC: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Nithin Raju <nithin@vmware.com>
Tested-by: Nithin Raju <nithin@vmware.com>

											
										
										
											2015-08-30 07:40:31 -07:00
+								flowmap_is_empty(struct flowmap fm)
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								{
 								    map_t map;
 								    FLOWMAP_FOR_EACH_MAP (map, fm) {
 								        if (map) {
 								            return false;
 								        }
 								    }
 								    return true;
 								}
 								static inline unsigned int
 								flowmap_n_1bits(struct flowmap fm)
 								{
 								    unsigned int n_1bits = 0;
 								    size_t unit;
 								    FLOWMAP_FOR_EACH_UNIT (unit) {
 								        n_1bits += count_1bits(fm.bits[unit]);
 								    }
 								    return n_1bits;
 								}
 								struct flowmap_aux {
 								    size_t unit;
 								    struct flowmap map;
 								};
 								static inline bool
 								flowmap_next_index(struct flowmap_aux *aux, size_t *idx)
 								{
 								    for (;;) {
 								        map_t *map = &aux->map.bits[aux->unit];
 								        if (*map) {
 								            *idx = aux->unit * MAP_T_BITS + raw_ctz(*map);
 								            *map = zero_rightmost_1bit(*map);
 								            return true;
 								        }
 								        if (++aux->unit >= FLOWMAP_UNITS) {
 								            return false;
 								        }
 								    }
 								}
 								/* Compressed flow. */
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
 								/* A sparse representation of a "struct flow".
 								 *
 								 * A "struct flow" is fairly large and tends to be mostly zeros.  Sparse
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								 * representation has two advantages.  First, it saves memory and, more
 								 * importantly, minimizes the number of accessed cache lines.  Second, it saves
 								 * time when the goal is to iterate over only the nonzero parts of the struct.
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								 *
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								 * The map member hold one bit for each uint64_t in a "struct flow".  Each
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								 * 0-bit indicates that the corresponding uint64_t is zero, each 1-bit that it
-												lib/flow: Maintain miniflow offline values explicitly.

This allows use of miniflows that have all of their values inline.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								 * *may* be nonzero (see below how this applies to minimasks).
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								 *
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								 * The values indicated by 'map' always follow the miniflow in memory.  The
 								 * user of the miniflow is responsible for always having enough storage after
 								 * the struct miniflow corresponding to the number of 1-bits in maps.
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								 *
-												lib/flow: Maintain miniflow offline values explicitly.

This allows use of miniflows that have all of their values inline.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								 * Elements in values array are allowed to be zero.  This is useful for "struct
-												match: New function minimatch_matches_flow().

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-02-06 16:13:19 -08:00
+								 * minimatch", for which ensuring that the miniflow and minimask members have
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								 * same maps allows optimization.  This allowance applies only to a miniflow
-												flow: Make compile with MSVC.

MSVC does not like zero sized arrays in structs.  Hence, remove the
'values' member from struct miniflow and add back the getters
miniflow_values() and miniflow_get_values().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-16 17:42:24 -07:00
+								 * that is not a mask.  That is, a minimask may NOT have zero elements in its
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								 * values.
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								 *
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								 * A miniflow is always dynamically allocated so that the maps are followed by
 								 * at least as many elements as there are 1-bits in maps. */
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								struct miniflow {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    struct flowmap map;
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    /* Followed by:
 								     *     uint64_t values[n];
 								     * where 'n' is miniflow_n_values(miniflow). */
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								};
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								BUILD_ASSERT_DECL(sizeof(struct miniflow) % sizeof(uint64_t) == 0);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								#define MINIFLOW_VALUES_SIZE(COUNT) ((COUNT) * sizeof(uint64_t))
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
-												flow: Make compile with MSVC.

MSVC does not like zero sized arrays in structs.  Hence, remove the
'values' member from struct miniflow and add back the getters
miniflow_values() and miniflow_get_values().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-16 17:42:24 -07:00
+								static inline uint64_t *miniflow_values(struct miniflow *mf)
 								{
 								    return (uint64_t *)(mf + 1);
 								}
 								static inline const uint64_t *miniflow_get_values(const struct miniflow *mf)
 								{
 								    return (const uint64_t *)(mf + 1);
 								}
-												lib/flow: Introduce miniflow_extract().

miniflow_extract() extracts packet headers directly to a miniflow,
which is a compressed form of the struct flow.  This does not require
a large struct to be cleared to begin with, and accesses less memory.
These performance benefits should allow this to be used in the DPDK
datapath.

miniflow_extract() takes a miniflow as an input/output parameter.  On
input the buffer for values to be extracted must be properly
initialized.  On output the map contains ones for all the fields that
have been extracted.

Some struct flow fields are reordered to make miniflow_extract to
progress in the logical order.

Some explicit "inline" keywords are necessary for GCC to optimize this
properly.  Also, macros are used for same reason instead of inline
functions for pushing data to the miniflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								struct pkt_metadata;
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								/* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units.
 								 * 'dst->map' is ignored on input and set on output to indicate which fields
 								 * were extracted. */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
-												match: Single malloc minimatch.

Allocate the miniflow and minimask in struct minimatch at once, so
that they are consecutive in memory.  This halves the number of
allocations, and allows smaller minimatches to share the same cache
line.

After this a minimatch has one heap allocation for all it's data.
Previously it had either none (when data was small enough to fit in
struct miniflow's inline buffer), or two (when the inline buffer was
insufficient).  Hopefully always having one performs almost the same
as none or two, in average.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								void miniflow_map_init(struct miniflow *, const struct flow *);
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								void flow_wc_map(const struct flow *, struct flowmap *);
-												match: Single malloc minimatch.

Allocate the miniflow and minimask in struct minimatch at once, so
that they are consecutive in memory.  This halves the number of
allocations, and allows smaller minimatches to share the same cache
line.

After this a minimatch has one heap allocation for all it's data.
Previously it had either none (when data was small enough to fit in
struct miniflow's inline buffer), or two (when the inline buffer was
insufficient).  Hopefully always having one performs almost the same
as none or two, in average.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
 								                      const struct miniflow *src);
 								void miniflow_init(struct miniflow *, const struct flow *);
-												flow: Eliminate miniflow_clone() and minimask_clone().

miniflow_clone() and minimask_clone() are no longer used, remove them
from the API.

Now that miniflow data is always inlined, it makes sense to rename
miniflow_clone_inline() miniflow_clone().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								void miniflow_clone(struct miniflow *, const struct miniflow *,
 								                    size_t n_values);
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								struct miniflow * miniflow_create(const struct flow *);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
 								void miniflow_expand(const struct miniflow *, struct flow *);
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								static inline uint64_t flow_u64_value(const struct flow *flow, size_t index)
-												lib/classifier: Optimize megaflows for single rule case.

When, during a classifier lookup, we narrow down to a single potential
rule, it is enough to match on ("unwildcard") one bit that differs
between the packet and the rule.

This is a special case of the more general algorithm, where it is
sufficient to match on enough bits that separates the packet from all
higher priority rules than the matched rule.  For a miss that would be
all the rules.  Implementing this is expensive for a more than a few
rules.  This patch starts by doing this for a single rule when we
already have it, also reducing the lookup cost by finishing the lookup
earlier than before.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
+								{
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    return ((uint64_t *)flow)[index];
-												lib/classifier: Optimize megaflows for single rule case.

When, during a classifier lookup, we narrow down to a single potential
rule, it is enough to match on ("unwildcard") one bit that differs
between the packet and the rule.

This is a special case of the more general algorithm, where it is
sufficient to match on enough bits that separates the packet from all
higher priority rules than the matched rule.  For a miss that would be
all the rules.  Implementing this is expensive for a more than a few
rules.  This patch starts by doing this for a single rule when we
already have it, also reducing the lookup cost by finishing the lookup
earlier than before.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
+								}
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								static inline uint64_t *flow_u64_lvalue(struct flow *flow, size_t index)
-												lib/classifier: Optimize megaflows for single rule case.

When, during a classifier lookup, we narrow down to a single potential
rule, it is enough to match on ("unwildcard") one bit that differs
between the packet and the rule.

This is a special case of the more general algorithm, where it is
sufficient to match on enough bits that separates the packet from all
higher priority rules than the matched rule.  For a miss that would be
all the rules.  Implementing this is expensive for a more than a few
rules.  This patch starts by doing this for a single rule when we
already have it, also reducing the lookup cost by finishing the lookup
earlier than before.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
+								{
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    return &((uint64_t *)flow)[index];
 								}
 								static inline size_t
 								miniflow_n_values(const struct miniflow *flow)
 								{
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return flowmap_n_1bits(flow->map);
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								}
 								struct flow_for_each_in_maps_aux {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    const struct flow *flow;
 								    struct flowmap_aux map_aux;
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								};
-												lib/flow: Use C99 declaration in for statement.

C99 declarations within code are allowed now.  Change the
FLOW_FOR_EACH_IN_MAP to use loop variable within the for statement.
This makes this macro more generally useful.

The loop variable name is suffixed with two underscores with the
intention that there would be a low likelihood of collision with any
of the macro parameters.

Also fix the return type of flow_get_next_in_map().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-28 16:56:29 -07:00
+								static inline bool
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								flow_values_get_next_in_maps(struct flow_for_each_in_maps_aux *aux,
 								                             uint64_t *value)
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								{
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    size_t idx;
 								    if (flowmap_next_index(&aux->map_aux, &idx)) {
 								        *value = flow_u64_value(aux->flow, idx);
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								        return true;
 								    }
 								    return false;
 								}
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Iterate through all flow u64 values specified by 'MAPS'. */
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								#define FLOW_FOR_EACH_IN_MAPS(VALUE, FLOW, MAPS)            \
 								    for (struct flow_for_each_in_maps_aux aux__             \
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								             = { (FLOW), FLOWMAP_AUX_INITIALIZER(MAPS) };   \
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								         flow_values_get_next_in_maps(&aux__, &(VALUE));)
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
-												flow: Clean up MINIFLOW_FOR_EACH_IN_MAP.

It seemed awkward to have declarations outside the for loop.

This may also be a little faster because it avoids some calls to
count_1bits().  The idea for that change is due to Jarno Rajahalme
<jrajahalme@nicira.com>.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-07 12:59:14 -07:00
+								struct mf_for_each_in_map_aux {
-												flow: Add comments to mf_get_next_in_map().

This patch adds comments to mf_get_next_in_map() to make it more
comprehensible.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:42 +01:00
+								    size_t unit;             /* Current 64-bit unit of the flowmaps
 								                                being processed. */
 								    struct flowmap fmap;     /* Remaining 1-bits corresponding to the
 -bit words in ‘values’ */
 								    struct flowmap map;      /* Remaining 1-bits corresponding to the
 -bit words of interest. */
 								    const uint64_t *values;  /* 64-bit words corresponding to the
 -bits in ‘fmap’. */
-												flow: Clean up MINIFLOW_FOR_EACH_IN_MAP.

It seemed awkward to have declarations outside the for loop.

This may also be a little faster because it avoids some calls to
count_1bits().  The idea for that change is due to Jarno Rajahalme
<jrajahalme@nicira.com>.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-07 12:59:14 -07:00
+								};
-												classifier: Support miniflow as a key.

Support struct miniflow as a key for datapath flow lookup.

The new classifier interface classifier_lookup_miniflow_first() takes
a miniflow as a key and stops at the first match with no regard to
flow prioritites.  This works only if the classifier has no
conflicting rules (as is the case with the userspace datapath
classifier).

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
-												flow: Add comments to mf_get_next_in_map().

This patch adds comments to mf_get_next_in_map() to make it more
comprehensible.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:42 +01:00
+								/* Get the data from ‘aux->values’ corresponding to the next lowest 1-bit
 								 * in ‘aux->map’, given that ‘aux->values’ points to an array of 64-bit
 								 * words corresponding to the 1-bits in ‘aux->fmap’, starting from the
 								 * rightmost 1-bit.
 								 *
 								 * Returns ’true’ if the traversal is incomplete, ‘false’ otherwise.
 								 * ‘aux’ is prepared for the next iteration after each call.
 								 *
 								 * This is used to traverse through, for example, the values in a miniflow
 								 * representation of a flow key selected by non-zero 64-bit words in a
 								 * corresponding subtable mask. */
-												flow: Clean up MINIFLOW_FOR_EACH_IN_MAP.

It seemed awkward to have declarations outside the for loop.

This may also be a little faster because it avoids some calls to
count_1bits().  The idea for that change is due to Jarno Rajahalme
<jrajahalme@nicira.com>.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-07 12:59:14 -07:00
+								static inline bool
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								mf_get_next_in_map(struct mf_for_each_in_map_aux *aux,
 								                   uint64_t *value)
-												flow: Clean up MINIFLOW_FOR_EACH_IN_MAP.

It seemed awkward to have declarations outside the for loop.

This may also be a little faster because it avoids some calls to
count_1bits().  The idea for that change is due to Jarno Rajahalme
<jrajahalme@nicira.com>.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-07 12:59:14 -07:00
+								{
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    map_t *map, *fmap;
 								    map_t rm1bit;
-												flow: Add comments to mf_get_next_in_map().

This patch adds comments to mf_get_next_in_map() to make it more
comprehensible.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:42 +01:00
+								    /* Skip empty map units. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    while (OVS_UNLIKELY(!*(map = &aux->map.bits[aux->unit]))) {
-												flow: Add comments to mf_get_next_in_map().

This patch adds comments to mf_get_next_in_map() to make it more
comprehensible.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:42 +01:00
+								        /* Skip remaining data in the current unit before advancing
 								         * to the next. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        aux->values += count_1bits(aux->fmap.bits[aux->unit]);
 								        if (++aux->unit == FLOWMAP_UNITS) {
 								            return false;
 								        }
 								    }
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    rm1bit = rightmost_1bit(*map);
 								    *map -= rm1bit;
 								    fmap = &aux->fmap.bits[aux->unit];
-												flow: Clean up MINIFLOW_FOR_EACH_IN_MAP.

It seemed awkward to have declarations outside the for loop.

This may also be a little faster because it avoids some calls to
count_1bits().  The idea for that change is due to Jarno Rajahalme
<jrajahalme@nicira.com>.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-07 12:59:14 -07:00
-												flow: Add comments to mf_get_next_in_map().

This patch adds comments to mf_get_next_in_map() to make it more
comprehensible.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:42 +01:00
+								    /* If the rightmost 1-bit found from the current unit in ‘aux->map’
 								     * (‘rm1bit’) is also present in ‘aux->fmap’, store the corresponding
 								     * value from ‘aux->values’ to ‘*value', otherwise store 0. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    if (OVS_LIKELY(*fmap & rm1bit)) {
-												flow: Add comments to mf_get_next_in_map().

This patch adds comments to mf_get_next_in_map() to make it more
comprehensible.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:42 +01:00
+								        /* Skip all 64-bit words in ‘values’ preceding the one corresponding
 								         * to ‘rm1bit’. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        map_t trash = *fmap & (rm1bit - 1);
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
-												flow: Skip invoking expensive count_1bits() with zero input.

This patch checks if trash is non-zero and only then resets the flowmap
bit and increment the pointer by set bits as found in trash.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-18 17:31:41 +01:00
+								        /* Avoid resetting 'fmap' and calling count_1bits() when trash is
 								         * zero. */
 								        if (trash) {
 								            *fmap -= trash;
 								            aux->values += count_1bits(trash);
 								        }
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        *value = *aux->values;
 								    } else {
 								        *value = 0;
-												classifier: Support miniflow as a key.

Support struct miniflow as a key for datapath flow lookup.

The new classifier interface classifier_lookup_miniflow_first() takes
a miniflow as a key and stops at the first match with no regard to
flow prioritites.  This works only if the classifier has no
conflicting rules (as is the case with the userspace datapath
classifier).

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								    }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return true;
-												classifier: Support miniflow as a key.

Support struct miniflow as a key for datapath flow lookup.

The new classifier interface classifier_lookup_miniflow_first() takes
a miniflow as a key and stops at the first match with no regard to
flow prioritites.  This works only if the classifier has no
conflicting rules (as is the case with the userspace datapath
classifier).

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Iterate through miniflow u64 values specified by 'FLOWMAP'. */
 								#define MINIFLOW_FOR_EACH_IN_FLOWMAP(VALUE, FLOW, FLOWMAP)          \
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    for (struct mf_for_each_in_map_aux aux__ =                      \
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        { 0, (FLOW)->map, (FLOWMAP), miniflow_get_values(FLOW) };   \
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								         mf_get_next_in_map(&aux__, &(VALUE));)
-												classifier: Support miniflow as a key.

Support struct miniflow as a key for datapath flow lookup.

The new classifier interface classifier_lookup_miniflow_first() takes
a miniflow as a key and stops at the first match with no regard to
flow prioritites.  This works only if the classifier has no
conflicting rules (as is the case with the userspace datapath
classifier).

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* This can be used when it is known that 'idx' is set in 'map'. */
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								static inline const uint64_t *
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								miniflow_values_get__(const uint64_t *values, map_t map, size_t idx)
-												flow.c: Improve minimask_equal() and minimask_has_extra().

minimask_equal() and minimask_has_extra() can take benefit from the
fact that minimasks have no zero data.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-11-26 15:17:26 -08:00
+								{
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return values + count_1bits(map & ((MAP_1 << idx) - 1));
-												flow.c: Improve minimask_equal() and minimask_has_extra().

minimask_equal() and minimask_has_extra() can take benefit from the
fact that minimasks have no zero data.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-11-26 15:17:26 -08:00
+								}
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								/* This can be used when it is known that 'u64_idx' is set in
-												flow: Define miniflow_get__() to simplify code.

miniflow_get__() can be used when it is known that the miniflow stores
a value at the given index.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-11-26 15:17:26 -08:00
+								 * the map of 'mf'. */
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								static inline const uint64_t *
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								miniflow_get__(const struct miniflow *mf, size_t idx)
 								{
 								    const uint64_t *values = miniflow_get_values(mf);
 								    const map_t *map = mf->map.bits;
 								    while (idx >= MAP_T_BITS) {
 								        idx -= MAP_T_BITS;
 								        values += count_1bits(*map++);
 								    }
 								    return miniflow_values_get__(values, *map, idx);
 								}
 								#define MINIFLOW_IN_MAP(MF, IDX) flowmap_is_set(&(MF)->map, IDX)
 								/* Get the value of the struct flow 'FIELD' as up to 8 byte wide integer type
 								 * 'TYPE' from miniflow 'MF'. */
 								#define MINIFLOW_GET_TYPE(MF, TYPE, FIELD)                              \
-												flow: Improve type-safety of MINIFLOW_GET_TYPE.

Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 21:34:26 -07:00
+								    (BUILD_ASSERT(sizeof(TYPE) == sizeof(((struct flow *)0)->FIELD)),   \
 								     BUILD_ASSERT_GCCONLY(__builtin_types_compatible_p(TYPE, typeof(((struct flow *)0)->FIELD))), \
 								     MINIFLOW_GET_TYPE__(MF, TYPE, FIELD))
 								/* Like MINIFLOW_GET_TYPE, but without checking that TYPE is the correct width
 								 * for FIELD.  (This is useful for deliberately reading adjacent fields in one
 								 * go.)  */
 								#define MINIFLOW_GET_TYPE__(MF, TYPE, FIELD)                            \
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    (MINIFLOW_IN_MAP(MF, FLOW_U64_OFFSET(FIELD))                        \
 								     ? ((OVS_FORCE const TYPE *)miniflow_get__(MF, FLOW_U64_OFFSET(FIELD))) \
 								     [FLOW_U64_OFFREM(FIELD) / sizeof(TYPE)]                            \
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								     : 0)
-												lib/flow: Simplify miniflow accessors, add ipv6 support.

Add new macro MINIFLOW_MAP(FIELD) that returns the map covering the
given struct flow field.

Change the miniflow accessors to macros so that they can take the
field name directly.

Use these to add ipv6 support to miniflow_hash_5tuple().

Add ipv6 support to flow_hash_5tuple() as well so that these two
functions continue to return the same hash value for the corresponding
flows.

Also, simplify miniflow_get_metadata().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												ofproto-dpif: Reject partial ct_labels if unsupported.

If only half of a ct_label is present in a miniflow/minimask (eg, only
matching on one specific bit), then rule_check() would allow the flow
even if ct_label was unsupported, because it required both 64-bit fields
that comprise the ct_label to be present in the miniflow before
performing the check.

Fix this by populating the stack copy of the label directly from the
miniflow fields if available (or zero each 64-bit word if unavailable).

Suggested-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2015-11-11 11:39:49 -08:00
+								#define MINIFLOW_GET_U128(FLOW, FIELD)                                  \
 								    (ovs_u128) { .u64 = {                                               \
 								            (MINIFLOW_IN_MAP(FLOW, FLOW_U64_OFFSET(FIELD)) ?            \
 								             *miniflow_get__(FLOW, FLOW_U64_OFFSET(FIELD)) : 0),        \
 								            (MINIFLOW_IN_MAP(FLOW, FLOW_U64_OFFSET(FIELD) + 1) ?        \
 								             *miniflow_get__(FLOW, FLOW_U64_OFFSET(FIELD) + 1) : 0) } }
-												Add connection tracking label support.

This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.

For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:

    table=0,priority=1,action=drop
    table=0,arp,action=normal
    table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
    table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
    table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-10-13 11:13:10 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								#define MINIFLOW_GET_U8(FLOW, FIELD)            \
 								    MINIFLOW_GET_TYPE(FLOW, uint8_t, FIELD)
 								#define MINIFLOW_GET_U16(FLOW, FIELD)           \
 								    MINIFLOW_GET_TYPE(FLOW, uint16_t, FIELD)
 								#define MINIFLOW_GET_BE16(FLOW, FIELD)          \
 								    MINIFLOW_GET_TYPE(FLOW, ovs_be16, FIELD)
 								#define MINIFLOW_GET_U32(FLOW, FIELD)           \
 								    MINIFLOW_GET_TYPE(FLOW, uint32_t, FIELD)
 								#define MINIFLOW_GET_BE32(FLOW, FIELD)          \
 								    MINIFLOW_GET_TYPE(FLOW, ovs_be32, FIELD)
 								#define MINIFLOW_GET_U64(FLOW, FIELD)           \
 								    MINIFLOW_GET_TYPE(FLOW, uint64_t, FIELD)
 								#define MINIFLOW_GET_BE64(FLOW, FIELD)          \
 								    MINIFLOW_GET_TYPE(FLOW, ovs_be64, FIELD)
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
 								static inline uint64_t miniflow_get(const struct miniflow *,
 								                                    unsigned int u64_ofs);
 								static inline uint32_t miniflow_get_u32(const struct miniflow *,
 								                                        unsigned int u32_ofs);
 								static inline ovs_be32 miniflow_get_be32(const struct miniflow *,
 								                                         unsigned int be32_ofs);
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								static inline uint16_t miniflow_get_vid(const struct miniflow *, size_t);
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								static inline uint16_t miniflow_get_tcp_flags(const struct miniflow *);
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
-												flow, match, classifier: Add new functions for miniflow and minimatch.

The miniflow and minimatch APIs lack several of the features of the flow
and match APIs.  This commit adds a few of the missing functions.

These functions will be used for the first time in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 22:00:34 -07:00
+								static inline uint64_t miniflow_get_tun_metadata_present_map(
 								    const struct miniflow *);
 								static inline uint32_t miniflow_get_recirc_id(const struct miniflow *);
 								static inline uint32_t miniflow_get_dp_hash(const struct miniflow *);
 								static inline ovs_be32 miniflow_get_ports(const struct miniflow *);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
 								bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
 								bool miniflow_equal_in_minimask(const struct miniflow *a,
 								                                const struct miniflow *b,
 								                                const struct minimask *);
 								bool miniflow_equal_flow_in_minimask(const struct miniflow *a,
 								                                     const struct flow *b,
 								                                     const struct minimask *);
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
+								uint32_t miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis);
-												lib/flow: Skip minimask value checks.

We allow zero 'values' in a miniflow for it to have the same map
as the corresponding minimask.  Minimasks themselves never have
zero data values, though.  Document this and optimize the code
accordingly.

v2:
- Made miniflow_get_map_in_range() to return data offset instead of
  a pointer via the last parameter.
- Simplified minimatch_hash_in_range() by removing pointer arithmetic.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-20 08:16:31 -08:00
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
 								/* Compressed flow wildcards. */
 								/* A sparse representation of a "struct flow_wildcards".
 								 *
-												lib/flow: Skip minimask value checks.

We allow zero 'values' in a miniflow for it to have the same map
as the corresponding minimask.  Minimasks themselves never have
zero data values, though.  Document this and optimize the code
accordingly.

v2:
- Made miniflow_get_map_in_range() to return data offset instead of
  a pointer via the last parameter.
- Simplified minimatch_hash_in_range() by removing pointer arithmetic.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-20 08:16:31 -08:00
+								 * See the large comment on struct miniflow for details.
 								 *
 								 * Note: While miniflow can have zero data for a 1-bit in the map,
 								 * a minimask may not!  We rely on this in the implementation. */
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								struct minimask {
 								    struct miniflow masks;
 								};
-												match: Single malloc minimatch.

Allocate the miniflow and minimask in struct minimatch at once, so
that they are consecutive in memory.  This halves the number of
allocations, and allows smaller minimatches to share the same cache
line.

After this a minimatch has one heap allocation for all it's data.
Previously it had either none (when data was small enough to fit in
struct miniflow's inline buffer), or two (when the inline buffer was
insufficient).  Hopefully always having one performs almost the same
as none or two, in average.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								void minimask_init(struct minimask *, const struct flow_wildcards *);
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								struct minimask * minimask_create(const struct flow_wildcards *);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								void minimask_combine(struct minimask *dst,
 								                      const struct minimask *a, const struct minimask *b,
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								                      uint64_t storage[FLOW_U64S]);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
 								void minimask_expand(const struct minimask *, struct flow_wildcards *);
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								static inline uint32_t minimask_get_u32(const struct minimask *,
 								                                        unsigned int u32_ofs);
 								static inline ovs_be32 minimask_get_be32(const struct minimask *,
 								                                         unsigned int be32_ofs);
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								static inline uint16_t minimask_get_vid_mask(const struct minimask *, size_t);
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
 								bool minimask_equal(const struct minimask *a, const struct minimask *b);
 								bool minimask_has_extra(const struct minimask *, const struct minimask *);
-												lib/flow: Optimize minimask_has_extra() and minimask_is_catchall()

We only need to iterate over the bits masked by the 'b' in
minimask_has_extra(), since for zeroes in 'b' there can be no 'extra'
wildcards in 'a', as 'b' has already wildcarded all the bits.

minimask_is_catchall() can be simplified by the invariant that mask's
map never has 1-bits for all-zero values.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												lib/flow: Maintain miniflow offline values explicitly.

This allows use of miniflows that have all of their values inline.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
-												lib/flow: Optimize minimask_has_extra() and minimask_is_catchall()

We only need to iterate over the bits masked by the 'b' in
minimask_has_extra(), since for zeroes in 'b' there can be no 'extra'
wildcards in 'a', as 'b' has already wildcarded all the bits.

minimask_is_catchall() can be simplified by the invariant that mask's
map never has 1-bits for all-zero values.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								/* Returns true if 'mask' matches every packet, false if 'mask' fixes any bits
 								 * or fields. */
 								static inline bool
 								minimask_is_catchall(const struct minimask *mask)
 								{
 								    /* For every 1-bit in mask's map, the corresponding value is non-zero,
 								     * so the only way the mask can not fix any bits or fields is for the
 								     * map the be zero. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return flowmap_is_empty(mask->masks.map);
-												lib/flow: Optimize minimask_has_extra() and minimask_is_catchall()

We only need to iterate over the bits masked by the 'b' in
minimask_has_extra(), since for zeroes in 'b' there can be no 'extra'
wildcards in 'a', as 'b' has already wildcarded all the bits.

minimask_is_catchall() can be simplified by the invariant that mask's
map never has 1-bits for all-zero values.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								}
-												lib: Inline functions used in classifier_lookup.

This helps about 1% in TCP_CRR performance test.  However, this also
helps by clearly showing the classifier_lookup() cost in perf reports
as one item.

This also cleans up the flow/match APIs from functionality only used
by the classifier, making is more straightforward to evolve them
later.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								/* Returns the uint64_t that would be at byte offset '8 * u64_ofs' if 'flow'
 								 * were expanded into a "struct flow". */
 								static inline uint64_t miniflow_get(const struct miniflow *flow,
 								                                    unsigned int u64_ofs)
 								{
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return MINIFLOW_IN_MAP(flow, u64_ofs) ? *miniflow_get__(flow, u64_ofs) : 0;
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								}
 								static inline uint32_t miniflow_get_u32(const struct miniflow *flow,
 								                                        unsigned int u32_ofs)
 								{
 								    uint64_t value = miniflow_get(flow, u32_ofs / 2);
 								#if WORDS_BIGENDIAN
 								    return (u32_ofs & 1) ? value : value >> 32;
 								#else
 								    return (u32_ofs & 1) ? value >> 32 : value;
 								#endif
 								}
 								static inline ovs_be32 miniflow_get_be32(const struct miniflow *flow,
 								                                         unsigned int be32_ofs)
 								{
 								    return (OVS_FORCE ovs_be32)miniflow_get_u32(flow, be32_ofs);
 								}
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								/* Returns the VID within the vlan_tci member of the "struct flow" represented
 								 * by 'flow'. */
 								static inline uint16_t
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								miniflow_get_vid(const struct miniflow *flow, size_t n)
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								{
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								    if (n < FLOW_MAX_VLAN_HEADERS) {
 								        union flow_vlan_hdr hdr = {
-												flow: Improve type-safety of MINIFLOW_GET_TYPE.

Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 21:34:26 -07:00
+								            .qtag = MINIFLOW_GET_BE32(flow, vlans[n].qtag)
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								        };
 								        return vlan_tci_to_vid(hdr.tci);
 								    }
 								    return 0;
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								/* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if 'mask'
 								 * were expanded into a "struct flow_wildcards". */
 								static inline uint32_t
 								minimask_get_u32(const struct minimask *mask, unsigned int u32_ofs)
 								{
 								    return miniflow_get_u32(&mask->masks, u32_ofs);
 								}
 								static inline ovs_be32
 								minimask_get_be32(const struct minimask *mask, unsigned int be32_ofs)
 								{
 								    return (OVS_FORCE ovs_be32)minimask_get_u32(mask, be32_ofs);
 								}
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								/* Returns the VID mask within the vlan_tci member of the "struct
 								 * flow_wildcards" represented by 'mask'. */
 								static inline uint16_t
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								minimask_get_vid_mask(const struct minimask *mask, size_t n)
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								{
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								    return miniflow_get_vid(&mask->masks, n);
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
 								/* Returns the value of the "tcp_flags" field in 'flow'. */
 								static inline uint16_t
 								miniflow_get_tcp_flags(const struct miniflow *flow)
 								{
-												lib/flow: Simplify miniflow accessors, add ipv6 support.

Add new macro MINIFLOW_MAP(FIELD) that returns the map covering the
given struct flow field.

Change the miniflow accessors to macros so that they can take the
field name directly.

Use these to add ipv6 support to miniflow_hash_5tuple().

Add ipv6 support to flow_hash_5tuple() as well so that these two
functions continue to return the same hash value for the corresponding
flows.

Also, simplify miniflow_get_metadata().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								    return ntohs(MINIFLOW_GET_BE16(flow, tcp_flags));
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */
 								static inline ovs_be64
 								miniflow_get_metadata(const struct miniflow *flow)
 								{
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								    return MINIFLOW_GET_BE64(flow, metadata);
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								}
-												flow, match, classifier: Add new functions for miniflow and minimatch.

The miniflow and minimatch APIs lack several of the features of the flow
and match APIs.  This commit adds a few of the missing functions.

These functions will be used for the first time in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 22:00:34 -07:00
+								/* Returns the bitmap that indicates which tunnel metadata fields are present
 								 * in 'flow'. */
 								static inline uint64_t
 								miniflow_get_tun_metadata_present_map(const struct miniflow *flow)
 								{
 								    return MINIFLOW_GET_U64(flow, tunnel.metadata.present.map);
 								}
 								/* Returns the recirc_id in 'flow.' */
 								static inline uint32_t
 								miniflow_get_recirc_id(const struct miniflow *flow)
 								{
 								    return MINIFLOW_GET_U32(flow, recirc_id);
 								}
 								/* Returns the dp_hash in 'flow.' */
 								static inline uint32_t
 								miniflow_get_dp_hash(const struct miniflow *flow)
 								{
 								    return MINIFLOW_GET_U32(flow, dp_hash);
 								}
-												flow: Improve type-safety of MINIFLOW_GET_TYPE.

Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 21:34:26 -07:00
 								/* Returns the 'tp_src' and 'tp_dst' fields together as one piece of data. */
 								static inline ovs_be32
 								miniflow_get_ports(const struct miniflow *flow)
 								{
 								    return MINIFLOW_GET_TYPE__(flow, ovs_be32, tp_src);
 								}
-												flow, match, classifier: Add new functions for miniflow and minimatch.

The miniflow and minimatch APIs lack several of the features of the flow
and match APIs.  This commit adds a few of the missing functions.

These functions will be used for the first time in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 22:00:34 -07:00
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
 								 *
 								 * The return value is all-1-bits if 'mask' matches on the whole value of the
 								 * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field,
 								 * or some other value if the metadata field is partially matched, partially
 								 * wildcarded. */
 								static inline ovs_be64
 								minimask_get_metadata_mask(const struct minimask *mask)
 								{
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								    return MINIFLOW_GET_BE64(&mask->masks, metadata);
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								}
-												flow: Simplify many functions for working with flows and wildcards.

Now that "struct flow" and "struct flow_wildcards" have the same simple
and uniform structure, it's easy to handle common operations by just
iterating over the bits inside them.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 13:43:18 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Perform a bitwise OR of miniflow 'src' flow data specified in 'subset' with
 								 * the equivalent fields in 'dst', storing the result in 'dst'.  'subset' must
 								 * be a subset of 'src's map. */
-												lib: Inline functions used in classifier_lookup.

This helps about 1% in TCP_CRR performance test.  However, this also
helps by clearly showing the classifier_lookup() cost in perf reports
as one item.

This also cleans up the flow/match APIs from functionality only used
by the classifier, making is more straightforward to evolve them
later.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								static inline void
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								flow_union_with_miniflow_subset(struct flow *dst, const struct miniflow *src,
 								                                struct flowmap subset)
-												lib: Inline functions used in classifier_lookup.

This helps about 1% in TCP_CRR performance test.  However, this also
helps by clearly showing the classifier_lookup() cost in perf reports
as one item.

This also cleans up the flow/match APIs from functionality only used
by the classifier, making is more straightforward to evolve them
later.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								{
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								    uint64_t *dst_u64 = (uint64_t *) dst;
-												flow: Make compile with MSVC.

MSVC does not like zero sized arrays in structs.  Hence, remove the
'values' member from struct miniflow and add back the getters
miniflow_values() and miniflow_get_values().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-16 17:42:24 -07:00
+								    const uint64_t *p = miniflow_get_values(src);
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    map_t map;
-												lib: Inline functions used in classifier_lookup.

This helps about 1% in TCP_CRR performance test.  However, this also
helps by clearly showing the classifier_lookup() cost in perf reports
as one item.

This also cleans up the flow/match APIs from functionality only used
by the classifier, making is more straightforward to evolve them
later.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    FLOWMAP_FOR_EACH_MAP (map, subset) {
 								        size_t idx;
 								        MAP_FOR_EACH_INDEX(idx, map) {
 								            dst_u64[idx] |= *p++;
 								        }
 								        dst_u64 += MAP_T_BITS;
-												lib: Inline functions used in classifier_lookup.

This helps about 1% in TCP_CRR performance test.  However, this also
helps by clearly showing the classifier_lookup() cost in perf reports
as one item.

This also cleans up the flow/match APIs from functionality only used
by the classifier, making is more straightforward to evolve them
later.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								    }
 								}
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Perform a bitwise OR of miniflow 'src' flow data with the equivalent
 								 * fields in 'dst', storing the result in 'dst'. */
 								static inline void
 								flow_union_with_miniflow(struct flow *dst, const struct miniflow *src)
 								{
 								    flow_union_with_miniflow_subset(dst, src, src->map);
 								}
-												odp: Support conntrack orig tuple key.

Userspace support for datapath original direction conntrack tuple.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
											
										
										
											2017-03-08 17:18:23 -08:00
+								static inline bool is_ct_valid(const struct flow *flow,
 								                               const struct flow_wildcards *mask,
 								                               struct flow_wildcards *wc)
 								{
 								    /* Matches are checked with 'mask' and without 'wc'. */
 								    if (mask && !wc) {
 								        /* Must match at least one of the bits that implies a valid
 								         * conntrack entry, or an explicit not-invalid. */
 								        return flow->ct_state & (CS_NEW | CS_ESTABLISHED | CS_RELATED
 								                                 | CS_REPLY_DIR | CS_SRC_NAT | CS_DST_NAT)
 								            || (flow->ct_state & CS_TRACKED
 								                && mask->masks.ct_state & CS_INVALID
 								                && !(flow->ct_state & CS_INVALID));
 								    }
 								    /* Else we are checking a fully extracted flow, where valid CT state always
 								     * has either 'new', 'established', or 'reply_dir' bit set. */
 								#define CS_VALID_MASK (CS_NEW | CS_ESTABLISHED | CS_REPLY_DIR)
 								    if (wc) {
 								        wc->masks.ct_state |= CS_VALID_MASK;
 								    }
 								    return flow->ct_state & CS_VALID_MASK;
 								}
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								static inline void
 								pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
 								{
-												odp: Support conntrack orig tuple key.

Userspace support for datapath original direction conntrack tuple.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
											
										
										
											2017-03-08 17:18:23 -08:00
+								    /* Update this function whenever struct flow changes. */
-												userspace: Add GTP-U support.

GTP, GPRS Tunneling Protocol, is a group of IP-based communications
protocols used to carry general packet radio service (GPRS) within
GSM, UMTS and LTE networks.  GTP protocol has two parts: Signalling
(GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used
for setting up GTP-U protocol, which is an IP-in-UDP tunneling
protocol. Usually GTP is used in connecting between base station for
radio, Serving Gateway (S-GW), and PDN Gateway (P-GW).

This patch implements GTP-U protocol for userspace datapath,
supporting only required header fields and G-PDU message type.
See spec in:
https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784
Signed-off-by: Feng Yang <yangfengee04@gmail.com>
Co-authored-by: Feng Yang <yangfengee04@gmail.com>
Signed-off-by: Yi Yang <yangyi01@inspur.com>
Co-authored-by: Yi Yang <yangyi01@inspur.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-25 11:19:23 -08:00
+								    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
-												odp: Support conntrack orig tuple key.

Userspace support for datapath original direction conntrack tuple.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
											
										
										
											2017-03-08 17:18:23 -08:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    md->recirc_id = flow->recirc_id;
 								    md->dp_hash = flow->dp_hash;
-												ofproto-dpif-rid: Make lookups cheaper.

This patch removes a large-ish copy from the recirculation context
lookup, which is performed for each recirculated upcall and
revalidation of a recirculating flow.

Tunnel metadata has grown large since the addition of Geneve options,
and copying that metadata for performing a lookup is not necessary.
Change recirc_metadata to use a pointer to struct flow_tnl, and only
copy the tunnel metadata when needed, and only copy as little of it as
possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    flow_tnl_copy__(&md->tunnel, &flow->tunnel);
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    md->skb_priority = flow->skb_priority;
 								    md->pkt_mark = flow->pkt_mark;
 								    md->in_port = flow->in_port;
-												Add support for connection tracking.

This patch adds a new action and fields to OVS that allow connection
tracking to be performed. This support works in conjunction with the
Linux kernel support merged into the Linux-4.3 development cycle.

Packets have two possible states with respect to connection tracking:
Untracked packets have not previously passed through the connection
tracker, while tracked packets have previously been through the
connection tracker. For OpenFlow pipeline processing, untracked packets
can become tracked, and they will remain tracked until the end of the
pipeline. Tracked packets cannot become untracked.

Connections can be unknown, uncommitted, or committed. Packets which are
untracked have unknown connection state. To know the connection state,
the packet must become tracked. Uncommitted connections have no
connection state stored about them, so it is only possible for the
connection tracker to identify whether they are a new connection or
whether they are invalid. Committed connections have connection state
stored beyond the lifetime of the packet, which allows later packets in
the same connection to be identified as part of the same established
connection, or related to an existing connection - for instance ICMP
error responses.

The new 'ct' action transitions the packet from "untracked" to
"tracked" by sending this flow through the connection tracker.
The following parameters are supported initally:

- "commit": When commit is executed, the connection moves from
  uncommitted state to committed state. This signals that information
  about the connection should be stored beyond the lifetime of the
  packet within the pipeline. This allows future packets in the same
  connection to be recognized as part of the same "established" (est)
  connection, as well as identifying packets in the reply (rpl)
  direction, or packets related to an existing connection (rel).
- "zone=[u16|NXM]": Perform connection tracking in the zone specified.
  Each zone is an independent connection tracking context. When the
  "commit" parameter is used, the connection will only be committed in
  the specified zone, and not in other zones. This is 0 by default.
- "table=NUMBER": Fork pipeline processing in two. The original instance
  of the packet will continue processing the current actions list as an
  untracked packet. An additional instance of the packet will be sent to
  the connection tracker, which will be re-injected into the OpenFlow
  pipeline to resume processing in the specified table, with the
  ct_state and other ct match fields set. If the table is not specified,
  then the packet is submitted to the connection tracker, but the
  pipeline does not fork and the ct match fields are not populated. It
  is strongly recommended to specify a table later than the current
  table to prevent loops.

When the "table" option is used, the packet that continues processing in
the specified table will have the ct_state populated. The ct_state may
have any of the following flags set:

- Tracked (trk): Connection tracking has occurred.
- Reply (rpl): The flow is in the reply direction.
- Invalid (inv): The connection tracker couldn't identify the connection.
- New (new): This is the beginning of a new connection.
- Established (est): This is part of an already existing connection.
- Related (rel): This connection is related to an existing connection.

For more information, consult the ovs-ofctl(8) man pages.

Below is a simple example flow table to allow outbound TCP traffic from
port 1 and drop traffic from port 2 that was not initiated by port 1:

    table=0,priority=1,action=drop
    table=0,arp,action=normal
    table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2
    table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1)
    table=1,in_port=2,ct_state=+trk+est,tcp,action=1
    table=1,in_port=2,ct_state=+trk+new,tcp,action=drop

Based on original design by Justin Pettit, contributions from Thomas
Graf and Daniele Di Proietto.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-08-11 10:56:09 -07:00
+								    md->ct_state = flow->ct_state;
 								    md->ct_zone = flow->ct_zone;
-												Add connection tracking mark support.

This patch adds a new 32-bit metadata field to the connection tracking
interface. When a mark is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_mark" field in the flow.

For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a mark with those
connections:

    table=0,priority=1,action=drop
    table=0,arp,action=normal
    table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_mark)),2
    table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
    table=1,in_port=2,ct_state=+trk,ct_mark=1,tcp,action=1

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-09-18 13:58:00 -07:00
+								    md->ct_mark = flow->ct_mark;
-												Add connection tracking label support.

This patch adds a new 128-bit metadata field to the connection tracking
interface. When a label is specified as part of the ct action and the
connection is committed, the value is saved with the current connection.
Subsequent ct lookups with the table specified will expose this metadata
as the "ct_label" field in the flow.

For example, to allow new TCP connections from port 1->2 and only allow
established connections from port 2->1, and to associate a label with
those connections:

    table=0,priority=1,action=drop
    table=0,arp,action=normal
    table=0,in_port=1,tcp,action=ct(commit,exec(set_field:1->ct_label)),2
    table=0,in_port=2,ct_state=-trk,tcp,action=ct(table=1)
    table=1,in_port=2,ct_state=+trk,ct_label=1,tcp,action=1

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-10-13 11:13:10 -07:00
+								    md->ct_label = flow->ct_label;
-												odp: Support conntrack orig tuple key.

Userspace support for datapath original direction conntrack tuple.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
											
										
										
											2017-03-08 17:18:23 -08:00
 								    md->ct_orig_tuple_ipv6 = false;
-												Check flow's dl_type before setting ct_orig_tuple in 'pkt_metadata_from_flow()'

Normally flow's dl_type will be a valid value. However when a packet is sent to
the controller, dl_type is not stored in the 'ofputil_packet_in_private'. When
the controller resumes (OFPRAW_NXT_RESUME) the packet, the flow's dl_type will be
0. If the flow's ct_state has valid value, then the 'pkt_metadata_from_flow'
neither sets the ct_orig_tuple from the flow nor resets it. This results in invalid
value ct_orig_tuple in the pkt_metadata.

This patch handles this situation by checking the dl_type before setting the
ct_orig_tuple. If dl_type is 0, it resets it. It also resets ct_orig_tuple if
dl_type is non zero and other than IPv4 or IPv6.

Reported-by: Daniel Alvarez Sanchez <dalvarez@redhat.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-October/339868.html
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-25 23:33:03 +05:30
+								    if (flow->dl_type && is_ct_valid(flow, NULL, NULL)) {
-												odp: Support conntrack orig tuple key.

Userspace support for datapath original direction conntrack tuple.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
											
										
										
											2017-03-08 17:18:23 -08:00
+								        if (flow->dl_type == htons(ETH_TYPE_IP)) {
 								            md->ct_orig_tuple.ipv4 = (struct ovs_key_ct_tuple_ipv4) {
 								                flow->ct_nw_src,
 								                flow->ct_nw_dst,
 								                flow->ct_tp_src,
 								                flow->ct_tp_dst,
 								                flow->ct_nw_proto,
 								            };
 								        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
 								            md->ct_orig_tuple_ipv6 = true;
 								            md->ct_orig_tuple.ipv6 = (struct ovs_key_ct_tuple_ipv6) {
 								                flow->ct_ipv6_src,
 								                flow->ct_ipv6_dst,
 								                flow->ct_tp_src,
 								                flow->ct_tp_dst,
 								                flow->ct_nw_proto,
 								            };
-												Check flow's dl_type before setting ct_orig_tuple in 'pkt_metadata_from_flow()'

Normally flow's dl_type will be a valid value. However when a packet is sent to
the controller, dl_type is not stored in the 'ofputil_packet_in_private'. When
the controller resumes (OFPRAW_NXT_RESUME) the packet, the flow's dl_type will be
0. If the flow's ct_state has valid value, then the 'pkt_metadata_from_flow'
neither sets the ct_orig_tuple from the flow nor resets it. This results in invalid
value ct_orig_tuple in the pkt_metadata.

This patch handles this situation by checking the dl_type before setting the
ct_orig_tuple. If dl_type is 0, it resets it. It also resets ct_orig_tuple if
dl_type is non zero and other than IPv4 or IPv6.

Reported-by: Daniel Alvarez Sanchez <dalvarez@redhat.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-October/339868.html
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-25 23:33:03 +05:30
+								        } else {
 								            /* Reset ct_orig_tuple for other types. */
 								            memset(&md->ct_orig_tuple, 0, sizeof md->ct_orig_tuple);
-												odp: Support conntrack orig tuple key.

Userspace support for datapath original direction conntrack tuple.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
											
										
										
											2017-03-08 17:18:23 -08:00
+								        }
 								    } else {
 								        memset(&md->ct_orig_tuple, 0, sizeof md->ct_orig_tuple);
 								    }
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												tnl-neigh-cache: Unwildcard flow members before inspecting them.

tnl_neigh_snoop() is part of the translation.  During translation we
have to unwildcard all the fields we examine to make a decision.

tnl_arp_snoop() and tnl_nd_snoop() failed to unwildcard fileds in case
of failure.  The solution is to do unwildcarding before the field is
inspected.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-05-24 17:13:29 -07:00
+								/* Often, during translation we need to read a value from a flow('FLOW') and
 								 * unwildcard the corresponding bits in the wildcards('WC').  This macro makes
 								 * it easier to do that. */
 								#define FLOW_WC_GET_AND_MASK_WC(FLOW, WC, FIELD) \
 								    (((WC) ? WC_MASK_FIELD(WC, FIELD) : NULL), ((FLOW)->FIELD))
-												userspace: Add packet_type in dp_packet and flow

This commit adds a packet_type attribute to the structs dp_packet and flow
to explicitly carry the type of the packet as prepration for the
introduction of the so-called packet type-aware pipeline (PTAP) in OVS.

The packet_type is a big-endian 32 bit integer with the encoding as
specified in OpenFlow verion 1.5.

The upper 16 bits contain the packet type name space. Pre-defined values
are defined in openflow-common.h:

enum ofp_header_type_namespaces {
    OFPHTN_ONF = 0,             /* ONF namespace. */
    OFPHTN_ETHERTYPE = 1,       /* ns_type is an Ethertype. */
    OFPHTN_IP_PROTO = 2,        /* ns_type is a IP protocol number. */
    OFPHTN_UDP_TCP_PORT = 3,    /* ns_type is a TCP or UDP port. */
    OFPHTN_IPV4_OPTION = 4,     /* ns_type is an IPv4 option number. */
};

The lower 16 bits specify the actual type in the context of the name space.

Only name spaces 0 and 1 will be supported for now.

For name space OFPHTN_ONF the relevant packet type is 0 (Ethernet).
This is the default packet_type in OVS and the only one supported so far.
Packets of type (OFPHTN_ONF, 0) are called Ethernet packets.

In name space OFPHTN_ETHERTYPE the type is the Ethertype of the packet.
A packet of type (OFPHTN_ETHERTYPE, <Ethertype>) is a standard L2 packet
whith the Ethernet header (and any VLAN tags) removed to expose the L3
(or L2.5) payload of the packet. These will simply be called L3 packets.

The Ethernet address fields dl_src and dl_dst in struct flow are not
applicable for an L3 packet and must be zero. However, to maintain
compatibility with the large code base, we have chosen to copy the
Ethertype of an L3 packet into the the dl_type field of struct flow.

This does not mean that it will be possible to match on dl_type for L3
packets with PTAP later on. Matching must be done on packet_type instead.

New dp_packets are initialized with packet_type Ethernet. Ports that
receive L3 packets will have to explicitly adjust the packet_type.

Signed-off-by: Jean Tourrilhes <jt@labs.hpe.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-25 16:29:59 +00:00
+								static inline bool is_ethernet(const struct flow *flow,
 								                               struct flow_wildcards *wc)
 								{
 								    if (wc) {
 								        WC_MASK_FIELD(wc, packet_type);
 								    }
 								    return flow->packet_type == htonl(PT_ETH);
 								}
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								static inline ovs_be16 get_dl_type(const struct flow *flow)
 								{
 								    if (flow->packet_type == htonl(PT_ETH)) {
 								        return flow->dl_type;
 								    } else if (pt_ns(flow->packet_type) == OFPHTN_ETHERTYPE) {
 								        return pt_ns_type_be(flow->packet_type);
 								    } else {
 								        return htons(FLOW_DL_TYPE_NONE);
 								    }
 								}
-												meta-flow: Clean up masking with prerequisities checking.

Change mf_are_prereqs_ok() take a flow_wildcards pointer, so that the
wildcards can be set at the same time as the prerequisiteis are
checked.  This makes it easier to write more obviously correct code.

Remove the functions mf_mask_field_and_prereqs() and
mf_mask_field_and_prereqs__(), and make the callers first check the
prerequisites, while supplying 'wc' to mf_are_prereqs_ok(), and if
successful, mask the bits of the field that were read or set using
mf_mask_field_masked().

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:03 -07:00
+								static inline bool is_vlan(const struct flow *flow,
 								                           struct flow_wildcards *wc)
 								{
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								    if (!is_ethernet(flow, wc)) {
 								        return false;
 								    }
-												meta-flow: Clean up masking with prerequisities checking.

Change mf_are_prereqs_ok() take a flow_wildcards pointer, so that the
wildcards can be set at the same time as the prerequisiteis are
checked.  This makes it easier to write more obviously correct code.

Remove the functions mf_mask_field_and_prereqs() and
mf_mask_field_and_prereqs__(), and make the callers first check the
prerequisites, while supplying 'wc' to mf_are_prereqs_ok(), and if
successful, mask the bits of the field that were read or set using
mf_mask_field_masked().

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:03 -07:00
+								    if (wc) {
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								        WC_MASK_FIELD_MASK(wc, vlans[0].tci, htons(VLAN_CFI));
-												meta-flow: Clean up masking with prerequisities checking.

Change mf_are_prereqs_ok() take a flow_wildcards pointer, so that the
wildcards can be set at the same time as the prerequisiteis are
checked.  This makes it easier to write more obviously correct code.

Remove the functions mf_mask_field_and_prereqs() and
mf_mask_field_and_prereqs__(), and make the callers first check the
prerequisites, while supplying 'wc' to mf_are_prereqs_ok(), and if
successful, mask the bits of the field that were read or set using
mf_mask_field_masked().

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:03 -07:00
+								    }
-												Add support for 802.1ad (QinQ tunneling)

Flow key handling changes:
 - Add VLAN header array in struct flow, to record multiple 802.1q VLAN
   headers.
 - Add dpif multi-VLAN capability probing. If datapath supports
   multi-VLAN, increase the maximum depth of nested OVS_KEY_ATTR_ENCAP.

Refactor VLAN handling in dpif-xlate:
 - Introduce 'xvlan' to track VLAN stack during flow processing.
 - Input and output VLAN translation according to the xbundle type.

Push VLAN action support:
 - Allow ethertype 0x88a8 in VLAN headers and push_vlan action.
 - Support push_vlan on dot1q packets.

Use other_config:vlan-limit in table Open_vSwitch to limit maximum VLANs
that can be matched. This allows us to preserve backwards compatibility.

Add test cases for VLAN depth limit, Multi-VLAN actions and QinQ VLAN
handling

Co-authored-by: Thomas F Herbert <thomasfherbert@gmail.com>
Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Co-authored-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Eric Garver <e@erig.me>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-01 17:47:59 -05:00
+								    return (flow->vlans[0].tci & htons(VLAN_CFI)) != 0;
-												meta-flow: Clean up masking with prerequisities checking.

Change mf_are_prereqs_ok() take a flow_wildcards pointer, so that the
wildcards can be set at the same time as the prerequisiteis are
checked.  This makes it easier to write more obviously correct code.

Remove the functions mf_mask_field_and_prereqs() and
mf_mask_field_and_prereqs__(), and make the callers first check the
prerequisites, while supplying 'wc' to mf_are_prereqs_ok(), and if
successful, mask the bits of the field that were read or set using
mf_mask_field_masked().

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:03 -07:00
+								}
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								static inline bool is_ip_any(const struct flow *flow)
 								{
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								    return dl_type_is_ip_any(get_dl_type(flow));
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												meta-flow: Clean up masking with prerequisities checking.

Change mf_are_prereqs_ok() take a flow_wildcards pointer, so that the
wildcards can be set at the same time as the prerequisiteis are
checked.  This makes it easier to write more obviously correct code.

Remove the functions mf_mask_field_and_prereqs() and
mf_mask_field_and_prereqs__(), and make the callers first check the
prerequisites, while supplying 'wc' to mf_are_prereqs_ok(), and if
successful, mask the bits of the field that were read or set using
mf_mask_field_masked().

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:03 -07:00
+								static inline bool is_ip_proto(const struct flow *flow, uint8_t ip_proto,
 								                               struct flow_wildcards *wc)
 								{
 								    if (is_ip_any(flow)) {
 								        if (wc) {
 								            WC_MASK_FIELD(wc, nw_proto);
 								        }
 								        return flow->nw_proto == ip_proto;
 								    }
 								    return false;
 								}
 								static inline bool is_tcp(const struct flow *flow,
 								                          struct flow_wildcards *wc)
 								{
 								    return is_ip_proto(flow, IPPROTO_TCP, wc);
 								}
 								static inline bool is_udp(const struct flow *flow,
 								                          struct flow_wildcards *wc)
 								{
 								    return is_ip_proto(flow, IPPROTO_UDP, wc);
 								}
 								static inline bool is_sctp(const struct flow *flow,
 								                           struct flow_wildcards *wc)
 								{
 								    return is_ip_proto(flow, IPPROTO_SCTP, wc);
 								}
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								static inline bool is_icmpv4(const struct flow *flow,
 								                             struct flow_wildcards *wc)
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								{
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								    if (get_dl_type(flow) == htons(ETH_TYPE_IP)) {
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								        if (wc) {
 								            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
 								        }
 								        return flow->nw_proto == IPPROTO_ICMP;
 								    }
 								    return false;
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								static inline bool is_icmpv6(const struct flow *flow,
 								                             struct flow_wildcards *wc)
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								{
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								    if (get_dl_type(flow) == htons(ETH_TYPE_IPV6)) {
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								        if (wc) {
 								            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
 								        }
 								        return flow->nw_proto == IPPROTO_ICMPV6;
 								    }
 								    return false;
-												lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().

Add inlined generic accessors for miniflow integer type fields, and a
new miniflow_get_tcp_flags() usinge these.  These will be used in a
later patch.

Some definitions also used in lib/packets.h had to be moved there to
resolve circular include dependencies.  Similarly, some inline
functions using struct flow are now in lib/flow.h.  IMO this is
cleaner, since now the lib/flow.h need not be included from
lib/packets.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:56 -07:00
+								}
-												flow: New function is_nd().

This simplifies a few pieces of code and will acquire another user in an
upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-02 11:35:29 -07:00
+								static inline bool is_nd(const struct flow *flow,
 								                         struct flow_wildcards *wc)
 								{
 								    if (is_icmpv6(flow, wc)) {
 								        if (wc) {
 								            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
 								        }
 								        if (flow->tp_dst != htons(0)) {
 								            return false;
 								        }
 								        if (wc) {
 								            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
 								        }
 								        return (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) ||
 								                flow->tp_src == htons(ND_NEIGHBOR_ADVERT));
 								    }
 								    return false;
 								}
-												Handle gratuitous ARP requests and replies in tnl_arp_snoop()

Problem:
========
In user-space tunneling implementation, tnl_arp_snoop() snoops only ARP
*reply* packets to resolve tunnel nexthop IP addresses to MAC addresses.
Normally the ARP requests are periodically sent by the local host IP stack,
so that the ARP cache in OVS is refreshed and entries do not time out.
However, if the remote tunnel nexthop is a VRRP IP, and the gateway
periodically sends gratuitous ARP *requests* to announce itself,
tnl_arp_snoop() treats them as INVALID. Consequently, the ARP cache in OVS
expires after 10 minutes, which results in dropping of the next packet(s)
until a new ARP request is responded to.

Fix:
====
Enhance the tunnel neighbor resolution logic in OVS to not only snoop on
ARP replies but also on gratuitous ARP requests.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
From: Manohar K C <manohar.krishnappa.chidambaraswamy@ericsson.com>
CC: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-05 12:20:27 +00:00
+								static inline bool is_arp(const struct flow *flow)
 								{
 								    return (flow->dl_type == htons(ETH_TYPE_ARP));
 								}
 								static inline bool is_garp(const struct flow *flow,
 								                           struct flow_wildcards *wc)
 								{
 								    if (is_arp(flow)) {
 								        return (FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src) ==
 								                FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_dst));
 								    }
 								    return false;
 								}
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								static inline bool is_igmp(const struct flow *flow, struct flow_wildcards *wc)
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								{
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								    if (get_dl_type(flow) == htons(ETH_TYPE_IP)) {
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								        if (wc) {
 								            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
 								        }
 								        return flow->nw_proto == IPPROTO_IGMP;
 								    }
 								    return false;
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								}
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								static inline bool is_mld(const struct flow *flow,
 								                          struct flow_wildcards *wc)
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								{
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								    if (is_icmpv6(flow, wc)) {
 								        if (wc) {
 								            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
 								        }
 								        return (flow->tp_src == htons(MLD_QUERY)
 								                || flow->tp_src == htons(MLD_REPORT)
 								                || flow->tp_src == htons(MLD_DONE)
 								                || flow->tp_src == htons(MLD2_REPORT));
 								    }
 								    return false;
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								}
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								static inline bool is_mld_query(const struct flow *flow,
 								                                struct flow_wildcards *wc)
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								{
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								    if (is_icmpv6(flow, wc)) {
 								        if (wc) {
 								            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
 								        }
 								        return flow->tp_src == htons(MLD_QUERY);
 								    }
 								    return false;
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								}
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								static inline bool is_mld_report(const struct flow *flow,
 								                                 struct flow_wildcards *wc)
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								{
-												ofproto-dpif-xlate: Fix IGMP megaflow matching.

IGMP translations wasn't setting enough bits in the wildcards to ensure
different packets were handled differently.

Reported-by: "O'Reilly, Darragh" <darragh.oreilly@hpe.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2016-April/021036.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-08 10:34:10 -07:00
+								    return is_mld(flow, wc) && !is_mld_query(flow, wc);
-												mcast-snooping: Add Multicast Listener Discovery support

Add support for MLDv1 and MLDv2. The behavior is not that different from
IGMP. Packets to all-hosts address and queries are always flooded,
reports go to routers, routers are added when a query is observed, and
all MLD packets go through slow path.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Ben Pfaff <blp@nicira.com>
[blp@nicira.com moved an assignment out of an 'if' statement]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-01 16:12:12 -03:00
+								}
-												ofproto-dpif-xlate: Identify STP BPDUs more specifically.

Apart from STP, EVB extension of LLDP as well as IEEE 802.1QBG use the
Nearest Customer Bridge (NCB) DMAC which has a value of 0180.c200.0000.
STP can be distinguished by Ethertype from these protocols.

Signed-off-by: Padmanabhan Krishnan <kprad1@yahoo.com>
[blp@nicira.com rewrote the details of the patch]
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Padmanabhan Krishnan <kprad1@yahoo.com>

											
										
										
											2014-04-24 13:18:18 -07:00
+								static inline bool is_stp(const struct flow *flow)
 								{
-												userspace: Add OXM field MFF_PACKET_TYPE

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at.

If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-23 16:47:57 +00:00
+								    return (flow->dl_type == htons(FLOW_DL_TYPE_NONE)
 								            && eth_addr_equals(flow->dl_dst, eth_addr_stp));
-												ofproto-dpif-xlate: Identify STP BPDUs more specifically.

Apart from STP, EVB extension of LLDP as well as IEEE 802.1QBG use the
Nearest Customer Bridge (NCB) DMAC which has a value of 0180.c200.0000.
STP can be distinguished by Ethertype from these protocols.

Signed-off-by: Padmanabhan Krishnan <kprad1@yahoo.com>
[blp@nicira.com rewrote the details of the patch]
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Padmanabhan Krishnan <kprad1@yahoo.com>

											
										
										
											2014-04-24 13:18:18 -07:00
+								}
-												bfd: Make the tp_dst masking megaflow-friendly.

When there are tunnel ports with BFD enabled, all UDP flows will have
dst port as match condition in datapath, which causes unnecessarily
high flow miss for all UDP traffic, and results in latency increase.

This patch solves the problem by masking tp_dst only for a single
bit that is enough to tell the mismatch when it is not BFD traffic.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-September/047360.html
Signed-off-by: Han Zhou <hzhou8@ebay.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-10-03 15:11:20 -07:00
+								/* Returns true if flow->tp_dst equals 'port'.  If 'wc' is nonnull, sets
 								 * appropriate bits in wc->masks.tp_dst to account for the test.
 								 *
 								 * The caller must already have ensured that 'flow' is a protocol for which
 								 * tp_dst is relevant. */
 								static inline bool tp_dst_equals(const struct flow *flow, uint16_t port,
 								                                 struct flow_wildcards *wc)
 								{
 								    uint16_t diff = port ^ ntohs(flow->tp_dst);
 								    if (wc) {
 								        if (diff) {
 								            /* Set mask for the most significant mismatching bit. */
 								            int ofs = raw_clz64((uint64_t) diff << 48); /* range [0,15] */
 								            wc->masks.tp_dst |= htons(0x8000 >> ofs);
 								        } else {
 								            /* Must match all bits. */
 								            wc->masks.tp_dst = OVS_BE16_MAX;
 								        }
 								    }
 								    return !diff;
 								}
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#endif /* flow.h */