ovs/lib/classifier.c

/*
 * Copyright (c) 2009-2017 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>
#include "classifier.h"
#include "classifier-private.h"
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "byte-order.h"
#include "openvswitch/dynamic-string.h"
#include "odp-util.h"
#include "packets.h"
#include "util.h"

struct trie_ctx;

/* A collection of "struct cls_conjunction"s currently embedded into a
 * cls_match. */
struct cls_conjunction_set {
    /* Link back to the cls_match.
     *
     * cls_conjunction_set is mostly used during classifier lookup, and, in
     * turn, during classifier lookup the most used member of
     * cls_conjunction_set is the rule's priority, so we cache it here for fast
     * access. */
    struct cls_match *match;
    int priority;               /* Cached copy of match->priority. */

    /* Conjunction information.
     *
     * 'min_n_clauses' allows some optimization during classifier lookup. */
    unsigned int n;             /* Number of elements in 'conj'. */
    unsigned int min_n_clauses; /* Smallest 'n' among elements of 'conj'. */
    struct cls_conjunction conj[];
};

/* Ports trie depends on both ports sharing the same ovs_be32. */
#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0);
#define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2)

static size_t
cls_conjunction_set_size(size_t n)
{
    return (sizeof(struct cls_conjunction_set)
            + n * sizeof(struct cls_conjunction));
}

static struct cls_conjunction_set *
cls_conjunction_set_alloc(struct cls_match *match,
                          const struct cls_conjunction conj[], size_t n)
{
    if (n) {
        size_t min_n_clauses = conj[0].n_clauses;
        for (size_t i = 1; i < n; i++) {
            min_n_clauses = MIN(min_n_clauses, conj[i].n_clauses);
        }

        struct cls_conjunction_set *set = xmalloc(cls_conjunction_set_size(n));
        set->match = match;
        set->priority = match->priority;
        set->n = n;
        set->min_n_clauses = min_n_clauses;
        memcpy(set->conj, conj, n * sizeof *conj);
        return set;
    } else {
        return NULL;
    }
}

static struct cls_match *
cls_match_alloc(const struct cls_rule *rule, ovs_version_t version,
                const struct cls_conjunction conj[], size_t n)
{
    size_t count = miniflow_n_values(rule->match.flow);

    struct cls_match *cls_match
        = xmalloc(sizeof *cls_match + MINIFLOW_VALUES_SIZE(count));

    ovsrcu_init(&cls_match->next, NULL);
    *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule;
    *CONST_CAST(int *, &cls_match->priority) = rule->priority;
    /* Make rule initially invisible. */
    cls_match->versions = VERSIONS_INITIALIZER(version, version);
    miniflow_clone(CONST_CAST(struct miniflow *, &cls_match->flow),
                   rule->match.flow, count);
    ovsrcu_set_hidden(&cls_match->conj_set,
                      cls_conjunction_set_alloc(cls_match, conj, n));

    return cls_match;
}

static struct cls_subtable *find_subtable(const struct classifier *cls,
                                          const struct minimask *);
static struct cls_subtable *insert_subtable(struct classifier *cls,
                                            const struct minimask *);
static void destroy_subtable(struct classifier *cls, struct cls_subtable *);

static const struct cls_match *find_match_wc(const struct cls_subtable *,
                                             ovs_version_t version,
                                             const struct flow *,
                                             struct trie_ctx *,
                                             unsigned int n_tries,
                                             struct flow_wildcards *);
static struct cls_match *find_equal(const struct cls_subtable *,
                                    const struct miniflow *, uint32_t hash);

/* Return the next visible (lower-priority) rule in the list.  Multiple
 * identical rules with the same priority may exist transitionally, but when
 * versioning is used at most one of them is ever visible for lookups on any
 * given 'version'. */
static inline const struct cls_match *
next_visible_rule_in_list(const struct cls_match *rule, ovs_version_t version)
{
    do {
        rule = cls_match_next(rule);
    } while (rule && !cls_match_visible_in_version(rule, version));

    return rule;
}

/* Type with maximum supported prefix length. */
union trie_prefix {
    struct in6_addr ipv6;  /* For sizing. */
    ovs_be32 be32;         /* For access. */
};

static unsigned int minimask_get_prefix_len(const struct minimask *,
                                            const struct mf_field *);
static void trie_init(struct classifier *cls, int trie_idx,
                      const struct mf_field *);
static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
                                union trie_prefix *plens);
static unsigned int trie_lookup_value(const rcu_trie_ptr *,
                                      const ovs_be32 value[], ovs_be32 plens[],
                                      unsigned int value_bits);
static void trie_destroy(rcu_trie_ptr *);
static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
                               int mlen);
static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
                               int mlen);
static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
                                 unsigned int n_bits);
static bool mask_prefix_bits_set(const struct flow_wildcards *,
                                 uint8_t be32ofs, unsigned int n_bits);

/* cls_rule. */

static inline void
cls_rule_init__(struct cls_rule *rule, unsigned int priority)
{
    rculist_init(&rule->node);
    *CONST_CAST(int *, &rule->priority) = priority;
    ovsrcu_init(&rule->cls_match, NULL);
}

/* Initializes 'rule' to match packets specified by 'match' at the given
 * 'priority'.  'match' must satisfy the invariant described in the comment at
 * the definition of struct match.
 *
 * The caller must eventually destroy 'rule' with cls_rule_destroy().
 *
 * Clients should not use priority INT_MIN.  (OpenFlow uses priorities between
 * 0 and UINT16_MAX, inclusive.) */
void
cls_rule_init(struct cls_rule *rule, const struct match *match, int priority)
{
    cls_rule_init__(rule, priority);
    minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match);
}

/* Same as cls_rule_init() for initialization from a "struct minimatch". */
void
cls_rule_init_from_minimatch(struct cls_rule *rule,
                             const struct minimatch *match, int priority)
{
    cls_rule_init__(rule, priority);
    minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match);
}

/* Initializes 'dst' as a copy of 'src'.
 *
 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
void
cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src)
{
    cls_rule_init__(dst, src->priority);
    minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match);
}

/* Initializes 'dst' with the data in 'src', destroying 'src'.
 *
 * 'src' must be a cls_rule NOT in a classifier.
 *
 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
void
cls_rule_move(struct cls_rule *dst, struct cls_rule *src)
{
    cls_rule_init__(dst, src->priority);
    minimatch_move(CONST_CAST(struct minimatch *, &dst->match),
                   CONST_CAST(struct minimatch *, &src->match));
}

/* Frees memory referenced by 'rule'.  Doesn't free 'rule' itself (it's
 * normally embedded into a larger structure).
 *
 * ('rule' must not currently be in a classifier.) */
void
cls_rule_destroy(struct cls_rule *rule)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    /* Must not be in a classifier. */
    ovs_assert(!get_cls_match_protected(rule));

    /* Check that the rule has been properly removed from the classifier. */
    ovs_assert(rule->node.prev == RCULIST_POISON
               || rculist_is_empty(&rule->node));
    rculist_poison__(&rule->node);   /* Poisons also the next pointer. */

    minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match));
}

/* This may only be called by the exclusive writer. */
void
cls_rule_set_conjunctions(struct cls_rule *cr,
                          const struct cls_conjunction *conj, size_t n)
{
    struct cls_match *match = get_cls_match_protected(cr);
    struct cls_conjunction_set *old
        = ovsrcu_get_protected(struct cls_conjunction_set *, &match->conj_set);
    struct cls_conjunction *old_conj = old ? old->conj : NULL;
    unsigned int old_n = old ? old->n : 0;

    if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) {
        if (old) {
            ovsrcu_postpone(free, old);
        }
        ovsrcu_set(&match->conj_set,
                   cls_conjunction_set_alloc(match, conj, n));
    }
}


/* Returns true if 'a' and 'b' match the same packets at the same priority,
 * false if they differ in some way. */
bool
cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b)
{
    return a->priority == b->priority && minimatch_equal(&a->match, &b->match);
}

/* Appends a string describing 'rule' to 's'. */
void
cls_rule_format(const struct cls_rule *rule, const struct tun_table *tun_table,
                const struct ofputil_port_map *port_map, struct ds *s)
{
    minimatch_format(&rule->match, tun_table, port_map, s, rule->priority);
}

/* Returns true if 'rule' matches every packet, false otherwise. */
bool
cls_rule_is_catchall(const struct cls_rule *rule)
{
    return minimask_is_catchall(rule->match.mask);
}

/* Makes 'rule' invisible in 'remove_version'.  Once that version is used in
 * lookups, the caller should remove 'rule' via ovsrcu_postpone().
 *
 * 'rule' must be in a classifier.
 * This may only be called by the exclusive writer. */
void
cls_rule_make_invisible_in_version(const struct cls_rule *rule,
                                   ovs_version_t remove_version)
{
    struct cls_match *cls_match = get_cls_match_protected(rule);

    ovs_assert(remove_version >= cls_match->versions.add_version);

    cls_match_set_remove_version(cls_match, remove_version);
}

/* This undoes the change made by cls_rule_make_invisible_in_version().
 *
 * 'rule' must be in a classifier.
 * This may only be called by the exclusive writer. */
void
cls_rule_restore_visibility(const struct cls_rule *rule)
{
    cls_match_set_remove_version(get_cls_match_protected(rule),
                                 OVS_VERSION_NOT_REMOVED);
}

/* Return true if 'rule' is visible in 'version'.
 *
 * 'rule' must be in a classifier. */
bool
cls_rule_visible_in_version(const struct cls_rule *rule, ovs_version_t version)
{
    struct cls_match *cls_match = get_cls_match(rule);

    return cls_match && cls_match_visible_in_version(cls_match, version);
}

/* Initializes 'cls' as a classifier that initially contains no classification
 * rules. */
void
classifier_init(struct classifier *cls, const uint8_t *flow_segments)
{
    cls->n_rules = 0;
    cmap_init(&cls->subtables_map);
    pvector_init(&cls->subtables);
    cls->n_flow_segments = 0;
    if (flow_segments) {
        while (cls->n_flow_segments < CLS_MAX_INDICES
               && *flow_segments < FLOW_U64S) {
            cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
        }
    }
    cls->n_tries = 0;
    for (int i = 0; i < CLS_MAX_TRIES; i++) {
        trie_init(cls, i, NULL);
    }
    cls->publish = true;
}

/* Destroys 'cls'.  Rules within 'cls', if any, are not freed; this is the
 * caller's responsibility.
 * May only be called after all the readers have been terminated. */
void
classifier_destroy(struct classifier *cls)
{
    if (cls) {
        struct cls_subtable *subtable;
        int i;

        for (i = 0; i < cls->n_tries; i++) {
            trie_destroy(&cls->tries[i].root);
        }

        CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
            destroy_subtable(cls, subtable);
        }
        cmap_destroy(&cls->subtables_map);

        pvector_destroy(&cls->subtables);
    }
}

/* Set the fields for which prefix lookup should be performed. */
bool
classifier_set_prefix_fields(struct classifier *cls,
                             const enum mf_field_id *trie_fields,
                             unsigned int n_fields)
{
    const struct mf_field * new_fields[CLS_MAX_TRIES];
    struct mf_bitmap fields = MF_BITMAP_INITIALIZER;
    int i, n_tries = 0;
    bool changed = false;

    for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) {
        const struct mf_field *field = mf_from_id(trie_fields[i]);
        if (field->flow_be32ofs < 0 || field->n_bits % 32) {
            /* Incompatible field.  This is the only place where we
             * enforce these requirements, but the rest of the trie code
             * depends on the flow_be32ofs to be non-negative and the
             * field length to be a multiple of 32 bits. */
            continue;
        }

        if (bitmap_is_set(fields.bm, trie_fields[i])) {
            /* Duplicate field, there is no need to build more than
             * one index for any one field. */
            continue;
        }
        bitmap_set1(fields.bm, trie_fields[i]);

        new_fields[n_tries] = NULL;
        if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
            new_fields[n_tries] = field;
            changed = true;
        }
        n_tries++;
    }

    if (changed || n_tries < cls->n_tries) {
        struct cls_subtable *subtable;

        /* Trie configuration needs to change.  Disable trie lookups
         * for the tries that are changing and wait all the current readers
         * with the old configuration to be done. */
        changed = false;
        CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
            for (i = 0; i < cls->n_tries; i++) {
                if ((i < n_tries && new_fields[i]) || i >= n_tries) {
                    if (subtable->trie_plen[i]) {
                        subtable->trie_plen[i] = 0;
                        changed = true;
                    }
                }
            }
        }
        /* Synchronize if any readers were using tries.  The readers may
         * temporarily function without the trie lookup based optimizations. */
        if (changed) {
            /* ovsrcu_synchronize() functions as a memory barrier, so it does
             * not matter that subtable->trie_plen is not atomic. */
            ovsrcu_synchronize();
        }

        /* Now set up the tries. */
        for (i = 0; i < n_tries; i++) {
            if (new_fields[i]) {
                trie_init(cls, i, new_fields[i]);
            }
        }
        /* Destroy the rest, if any. */
        for (; i < cls->n_tries; i++) {
            trie_init(cls, i, NULL);
        }

        cls->n_tries = n_tries;
        return true;
    }

    return false; /* No change. */
}

static void
trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
{
    struct cls_trie *trie = &cls->tries[trie_idx];
    struct cls_subtable *subtable;

    if (trie_idx < cls->n_tries) {
        trie_destroy(&trie->root);
    } else {
        ovsrcu_set_hidden(&trie->root, NULL);
    }
    trie->field = field;

    /* Add existing rules to the new trie. */
    CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
        unsigned int plen;

        plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
        if (plen) {
            struct cls_match *head;

            CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
                trie_insert(trie, head->cls_rule, plen);
            }
        }
        /* Initialize subtable's prefix length on this field.  This will
         * allow readers to use the trie. */
        atomic_thread_fence(memory_order_release);
        subtable->trie_plen[trie_idx] = plen;
    }
}

/* Returns true if 'cls' contains no classification rules, false otherwise.
 * Checking the cmap requires no locking. */
bool
classifier_is_empty(const struct classifier *cls)
{
    return cmap_is_empty(&cls->subtables_map);
}

/* Returns the number of rules in 'cls'. */
int
classifier_count(const struct classifier *cls)
{
    /* n_rules is an int, so in the presence of concurrent writers this will
     * return either the old or a new value. */
    return cls->n_rules;
}

static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
{
    /* Could optimize to use the same map if needed for fast path. */
    return (miniflow_get_ports(match->flow)
            & miniflow_get_ports(&match->mask->masks));
}

/* Inserts 'rule' into 'cls' in 'version'.  Until 'rule' is removed from 'cls',
 * the caller must not modify or free it.
 *
 * If 'cls' already contains an identical rule (including wildcards, values of
 * fixed fields, and priority) that is visible in 'version', replaces the old
 * rule by 'rule' and returns the rule that was replaced.  The caller takes
 * ownership of the returned rule and is thus responsible for destroying it
 * with cls_rule_destroy(), after RCU grace period has passed (see
 * ovsrcu_postpone()).
 *
 * Returns NULL if 'cls' does not contain a rule with an identical key, after
 * inserting the new rule.  In this case, no rules are displaced by the new
 * rule, even rules that cannot have any effect because the new rule matches a
 * superset of their flows and has higher priority.
 */
const struct cls_rule *
classifier_replace(struct classifier *cls, const struct cls_rule *rule,
                   ovs_version_t version,
                   const struct cls_conjunction *conjs, size_t n_conjs)
{
    struct cls_match *new;
    struct cls_subtable *subtable;
    uint32_t ihash[CLS_MAX_INDICES];
    struct cls_match *head;
    unsigned int mask_offset;
    size_t n_rules = 0;
    uint32_t basis;
    uint32_t hash;
    unsigned int i;

    /* 'new' is initially invisible to lookups. */
    new = cls_match_alloc(rule, version, conjs, n_conjs);
    ovsrcu_set(&CONST_CAST(struct cls_rule *, rule)->cls_match, new);

    subtable = find_subtable(cls, rule->match.mask);
    if (!subtable) {
        subtable = insert_subtable(cls, rule->match.mask);
    }

    /* Compute hashes in segments. */
    basis = 0;
    mask_offset = 0;
    for (i = 0; i < subtable->n_indices; i++) {
        ihash[i] = minimatch_hash_range(&rule->match, subtable->index_maps[i],
                                        &mask_offset, &basis);
    }
    hash = minimatch_hash_range(&rule->match, subtable->index_maps[i],
                                &mask_offset, &basis);

    head = find_equal(subtable, rule->match.flow, hash);
    if (!head) {
        /* Add rule to tries.
         *
         * Concurrent readers might miss seeing the rule until this update,
         * which might require being fixed up by revalidation later. */
        for (i = 0; i < cls->n_tries; i++) {
            if (subtable->trie_plen[i]) {
                trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
            }
        }

        /* Add rule to ports trie. */
        if (subtable->ports_mask_len) {
            /* We mask the value to be inserted to always have the wildcarded
             * bits in known (zero) state, so we can include them in comparison
             * and they will always match (== their original value does not
             * matter). */
            ovs_be32 masked_ports = minimatch_get_ports(&rule->match);

            trie_insert_prefix(&subtable->ports_trie, &masked_ports,
                               subtable->ports_mask_len);
        }

        /* Add new node to segment indices. */
        for (i = 0; i < subtable->n_indices; i++) {
            ccmap_inc(&subtable->indices[i], ihash[i]);
        }
        n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
    } else {   /* Equal rules exist in the classifier already. */
        struct cls_match *prev, *iter;

        /* Scan the list for the insertion point that will keep the list in
         * order of decreasing priority.  Insert after rules marked invisible
         * in any version of the same priority. */
        FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
            if (rule->priority > iter->priority
                || (rule->priority == iter->priority
                    && !cls_match_is_eventually_invisible(iter))) {
                break;
            }
        }

        /* Replace 'iter' with 'new' or insert 'new' between 'prev' and
         * 'iter'. */
        if (iter) {
            struct cls_rule *old;

            if (rule->priority == iter->priority) {
                cls_match_replace(prev, iter, new);
                old = CONST_CAST(struct cls_rule *, iter->cls_rule);
            } else {
                cls_match_insert(prev, iter, new);
                old = NULL;
            }

            /* Replace the existing head in data structures, if rule is the new
             * head. */
            if (iter == head) {
                cmap_replace(&subtable->rules, &head->cmap_node,
                             &new->cmap_node, hash);
            }

            if (old) {
                struct cls_conjunction_set *conj_set;

                conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
                                                &iter->conj_set);
                if (conj_set) {
                    ovsrcu_postpone(free, conj_set);
                }

                ovsrcu_set(&old->cls_match, NULL); /* Marks old rule as removed
                                                    * from the classifier. */
                ovsrcu_postpone(cls_match_free_cb, iter);

                /* No change in subtable's max priority or max count. */

                /* Make 'new' visible to lookups in the appropriate version. */
                cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);

                /* Make rule visible to iterators (immediately). */
                rculist_replace(CONST_CAST(struct rculist *, &rule->node),
                                &old->node);

                /* Return displaced rule.  Caller is responsible for keeping it
                 * around until all threads quiesce. */
                return old;
            }
        } else {
            /* 'new' is new node after 'prev' */
            cls_match_insert(prev, iter, new);
        }
    }

    /* Make 'new' visible to lookups in the appropriate version. */
    cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);

    /* Make rule visible to iterators (immediately). */
    rculist_push_back(&subtable->rules_list,
                      CONST_CAST(struct rculist *, &rule->node));

    /* Rule was added, not replaced.  Update 'subtable's 'max_priority' and
     * 'max_count', if necessary.
     *
     * The rule was already inserted, but concurrent readers may not see the
     * rule yet as the subtables vector is not updated yet.  This will have to
     * be fixed by revalidation later. */
    if (n_rules == 1) {
        subtable->max_priority = rule->priority;
        subtable->max_count = 1;
        pvector_insert(&cls->subtables, subtable, rule->priority);
    } else if (rule->priority == subtable->max_priority) {
        ++subtable->max_count;
    } else if (rule->priority > subtable->max_priority) {
        subtable->max_priority = rule->priority;
        subtable->max_count = 1;
        pvector_change_priority(&cls->subtables, subtable, rule->priority);
    }

    /* Nothing was replaced. */
    cls->n_rules++;

    if (cls->publish) {
        pvector_publish(&cls->subtables);
    }

    return NULL;
}

/* Inserts 'rule' into 'cls'.  Until 'rule' is removed from 'cls', the caller
 * must not modify or free it.
 *
 * 'cls' must not contain an identical rule (including wildcards, values of
 * fixed fields, and priority).  Use classifier_find_rule_exactly() to find
 * such a rule. */
void
classifier_insert(struct classifier *cls, const struct cls_rule *rule,
                  ovs_version_t version, const struct cls_conjunction conj[],
                  size_t n_conj)
{
    const struct cls_rule *displaced_rule
        = classifier_replace(cls, rule, version, conj, n_conj);
    ovs_assert(!displaced_rule);
}

/* If 'rule' is in 'cls', removes 'rule' from 'cls' and returns true.  It is
 * the caller's responsibility to destroy 'rule' with cls_rule_destroy(),
 * freeing the memory block in which 'rule' resides, etc., as necessary.
 *
 * If 'rule' is not in any classifier, returns false without making any
 * changes.
 *
 * 'rule' must not be in some classifier other than 'cls'.
 */
bool
classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule)
{
    struct cls_match *rule, *prev, *next, *head;
    struct cls_conjunction_set *conj_set;
    struct cls_subtable *subtable;
    uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
    unsigned int mask_offset;
    size_t n_rules;
    unsigned int i;

    rule = get_cls_match_protected(cls_rule);
    if (!rule) {
        return false;
    }
    /* Mark as removed. */
    ovsrcu_set(&CONST_CAST(struct cls_rule *, cls_rule)->cls_match, NULL);

    /* Remove 'cls_rule' from the subtable's rules list. */
    rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node));

    subtable = find_subtable(cls, cls_rule->match.mask);
    ovs_assert(subtable);

    mask_offset = 0;
    for (i = 0; i < subtable->n_indices; i++) {
        ihash[i] = minimatch_hash_range(&cls_rule->match,
                                        subtable->index_maps[i],
                                        &mask_offset, &basis);
    }
    hash = minimatch_hash_range(&cls_rule->match, subtable->index_maps[i],
                                &mask_offset, &basis);

    head = find_equal(subtable, cls_rule->match.flow, hash);

    /* Check if the rule is not the head rule. */
    if (rule != head) {
        struct cls_match *iter;

        /* Not the head rule, but potentially one with the same priority. */
        /* Remove from the list of equal rules. */
        FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
            if (rule == iter) {
                break;
            }
        }
        ovs_assert(iter == rule);

        cls_match_remove(prev, rule);

        goto check_priority;
    }

    /* 'rule' is the head rule.  Check if there is another rule to
     * replace 'rule' in the data structures. */
    next = cls_match_next_protected(rule);
    if (next) {
        cmap_replace(&subtable->rules, &rule->cmap_node, &next->cmap_node,
                     hash);
        goto check_priority;
    }

    /* 'rule' is last of the kind in the classifier, must remove from all the
     * data structures. */

    if (subtable->ports_mask_len) {
        ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match);

        trie_remove_prefix(&subtable->ports_trie,
                           &masked_ports, subtable->ports_mask_len);
    }
    for (i = 0; i < cls->n_tries; i++) {
        if (subtable->trie_plen[i]) {
            trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]);
        }
    }

    /* Remove rule node from indices. */
    for (i = 0; i < subtable->n_indices; i++) {
        ccmap_dec(&subtable->indices[i], ihash[i]);
    }
    n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash);

    if (n_rules == 0) {
        destroy_subtable(cls, subtable);
    } else {
check_priority:
        if (subtable->max_priority == rule->priority
            && --subtable->max_count == 0) {
            /* Find the new 'max_priority' and 'max_count'. */
            int max_priority = INT_MIN;
            CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
                if (head->priority > max_priority) {
                    max_priority = head->priority;
                    subtable->max_count = 1;
                } else if (head->priority == max_priority) {
                    ++subtable->max_count;
                }
            }
            subtable->max_priority = max_priority;
            pvector_change_priority(&cls->subtables, subtable, max_priority);
        }
    }

    if (cls->publish) {
        pvector_publish(&cls->subtables);
    }

    /* free the rule. */
    conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
                                    &rule->conj_set);
    if (conj_set) {
        ovsrcu_postpone(free, conj_set);
    }
    ovsrcu_postpone(cls_match_free_cb, rule);
    cls->n_rules--;

    return true;
}

void
classifier_remove_assert(struct classifier *cls,
                         const struct cls_rule *cls_rule)
{
    ovs_assert(classifier_remove(cls, cls_rule));
}

/* Prefix tree context.  Valid when 'lookup_done' is true.  Can skip all
 * subtables which have a prefix match on the trie field, but whose prefix
 * length is not indicated in 'match_plens'.  For example, a subtable that
 * has a 8-bit trie field prefix match can be skipped if
 * !be_get_bit_at(&match_plens, 8 - 1).  If skipped, 'maskbits' prefix bits
 * must be unwildcarded to make datapath flow only match packets it should. */
struct trie_ctx {
    const struct cls_trie *trie;
    bool lookup_done;        /* Status of the lookup. */
    uint8_t be32ofs;         /* U32 offset of the field in question. */
    unsigned int maskbits;   /* Prefix length needed to avoid false matches. */
    union trie_prefix match_plens;  /* Bitmask of prefix lengths with possible
                                     * matches. */
};

static void
trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
{
    ctx->trie = trie;
    ctx->be32ofs = trie->field->flow_be32ofs;
    ctx->lookup_done = false;
}

struct conjunctive_match {
    struct hmap_node hmap_node;
    uint32_t id;
    uint64_t clauses;
};

static struct conjunctive_match *
find_conjunctive_match__(struct hmap *matches, uint64_t id, uint32_t hash)
{
    struct conjunctive_match *m;

    HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, hash, matches) {
        if (m->id == id) {
            return m;
        }
    }
    return NULL;
}

static bool
find_conjunctive_match(const struct cls_conjunction_set *set,
                       unsigned int max_n_clauses, struct hmap *matches,
                       struct conjunctive_match *cm_stubs, size_t n_cm_stubs,
                       uint32_t *idp)
{
    const struct cls_conjunction *c;

    if (max_n_clauses < set->min_n_clauses) {
        return false;
    }

    for (c = set->conj; c < &set->conj[set->n]; c++) {
        struct conjunctive_match *cm;
        uint32_t hash;

        if (c->n_clauses > max_n_clauses) {
            continue;
        }

        hash = hash_int(c->id, 0);
        cm = find_conjunctive_match__(matches, c->id, hash);
        if (!cm) {
            size_t n = hmap_count(matches);

            cm = n < n_cm_stubs ? &cm_stubs[n] : xmalloc(sizeof *cm);
            hmap_insert(matches, &cm->hmap_node, hash);
            cm->id = c->id;
            cm->clauses = UINT64_MAX << (c->n_clauses & 63);
        }
        cm->clauses |= UINT64_C(1) << c->clause;
        if (cm->clauses == UINT64_MAX) {
            *idp = cm->id;
            return true;
        }
    }
    return false;
}

static void
free_conjunctive_matches(struct hmap *matches,
                         struct conjunctive_match *cm_stubs, size_t n_cm_stubs)
{
    if (hmap_count(matches) > n_cm_stubs) {
        struct conjunctive_match *cm, *next;

        HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) {
            if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) {
                free(cm);
            }
        }
    }
    hmap_destroy(matches);
}

/* Like classifier_lookup(), except that support for conjunctive matches can be
 * configured with 'allow_conjunctive_matches'.  That feature is not exposed
 * externally because turning off conjunctive matches is only useful to avoid
 * recursion within this function itself.
 *
 * 'flow' is non-const to allow for temporary modifications during the lookup.
 * Any changes are restored before returning. */
static const struct cls_rule *
classifier_lookup__(const struct classifier *cls, ovs_version_t version,
                    struct flow *flow, struct flow_wildcards *wc,
                    bool allow_conjunctive_matches)
{
    struct trie_ctx trie_ctx[CLS_MAX_TRIES];
    const struct cls_match *match;
    /* Highest-priority flow in 'cls' that certainly matches 'flow'. */
    const struct cls_match *hard = NULL;
    int hard_pri = INT_MIN;     /* hard ? hard->priority : INT_MIN. */

    /* Highest-priority conjunctive flows in 'cls' matching 'flow'.  Since
     * these are (components of) conjunctive flows, we can only know whether
     * the full conjunctive flow matches after seeing multiple of them.  Thus,
     * we refer to these as "soft matches". */
    struct cls_conjunction_set *soft_stub[64];
    struct cls_conjunction_set **soft = soft_stub;
    size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub);
    int soft_pri = INT_MIN;    /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */

    /* Synchronize for cls->n_tries and subtable->trie_plen.  They can change
     * when table configuration changes, which happens typically only on
     * startup. */
    atomic_thread_fence(memory_order_acquire);

    /* Initialize trie contexts for find_match_wc(). */
    for (int i = 0; i < cls->n_tries; i++) {
        trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
    }

    /* Main loop. */
    struct cls_subtable *subtable;
    PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri + 1, 2, sizeof *subtable,
                               &cls->subtables) {
        struct cls_conjunction_set *conj_set;

        /* Skip subtables with no match, or where the match is lower-priority
         * than some certain match we've already found. */
        match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries,
                              wc);
        if (!match || match->priority <= hard_pri) {
            continue;
        }

        conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set);
        if (!conj_set) {
            /* 'match' isn't part of a conjunctive match.  It's the best
             * certain match we've got so far, since we know that it's
             * higher-priority than hard_pri.
             *
             * (There might be a higher-priority conjunctive match.  We can't
             * tell yet.) */
            hard = match;
            hard_pri = hard->priority;
        } else if (allow_conjunctive_matches) {
            /* 'match' is part of a conjunctive match.  Add it to the list. */
            if (OVS_UNLIKELY(n_soft >= allocated_soft)) {
                struct cls_conjunction_set **old_soft = soft;

                allocated_soft *= 2;
                soft = xmalloc(allocated_soft * sizeof *soft);
                memcpy(soft, old_soft, n_soft * sizeof *soft);
                if (old_soft != soft_stub) {
                    free(old_soft);
                }
            }
            soft[n_soft++] = conj_set;

            /* Keep track of the highest-priority soft match. */
            if (soft_pri < match->priority) {
                soft_pri = match->priority;
            }
        }
    }

    /* In the common case, at this point we have no soft matches and we can
     * return immediately.  (We do the same thing if we have potential soft
     * matches but none of them are higher-priority than our hard match.) */
    if (hard_pri >= soft_pri) {
        if (soft != soft_stub) {
            free(soft);
        }
        return hard ? hard->cls_rule : NULL;
    }

    /* At this point, we have some soft matches.  We might also have a hard
     * match; if so, its priority is lower than the highest-priority soft
     * match. */

    /* Soft match loop.
     *
     * Check whether soft matches are real matches. */
    for (;;) {
        /* Delete soft matches that are null.  This only happens in second and
         * subsequent iterations of the soft match loop, when we drop back from
         * a high-priority soft match to a lower-priority one.
         *
         * Also, delete soft matches whose priority is less than or equal to
         * the hard match's priority.  In the first iteration of the soft
         * match, these can be in 'soft' because the earlier main loop found
         * the soft match before the hard match.  In second and later iteration
         * of the soft match loop, these can be in 'soft' because we dropped
         * back from a high-priority soft match to a lower-priority soft match.
         *
         * It is tempting to delete soft matches that cannot be satisfied
         * because there are fewer soft matches than required to satisfy any of
         * their conjunctions, but we cannot do that because there might be
         * lower priority soft or hard matches with otherwise identical
         * matches.  (We could special case those here, but there's no
         * need--we'll do so at the bottom of the soft match loop anyway and
         * this duplicates less code.)
         *
         * It's also tempting to break out of the soft match loop if 'n_soft ==
         * 1' but that would also miss lower-priority hard matches.  We could
         * special case that also but again there's no need. */
        for (int i = 0; i < n_soft; ) {
            if (!soft[i] || soft[i]->priority <= hard_pri) {
                soft[i] = soft[--n_soft];
            } else {
                i++;
            }
        }
        if (!n_soft) {
            break;
        }

        /* Find the highest priority among the soft matches.  (We know this
         * must be higher than the hard match's priority; otherwise we would
         * have deleted all of the soft matches in the previous loop.)  Count
         * the number of soft matches that have that priority. */
        soft_pri = INT_MIN;
        int n_soft_pri = 0;
        for (int i = 0; i < n_soft; i++) {
            if (soft[i]->priority > soft_pri) {
                soft_pri = soft[i]->priority;
                n_soft_pri = 1;
            } else if (soft[i]->priority == soft_pri) {
                n_soft_pri++;
            }
        }
        ovs_assert(soft_pri > hard_pri);

        /* Look for a real match among the highest-priority soft matches.
         *
         * It's unusual to have many conjunctive matches, so we use stubs to
         * avoid calling malloc() in the common case.  An hmap has a built-in
         * stub for up to 2 hmap_nodes; possibly, we would benefit a variant
         * with a bigger stub. */
        struct conjunctive_match cm_stubs[16];
        struct hmap matches;

        hmap_init(&matches);
        for (int i = 0; i < n_soft; i++) {
            uint32_t id;

            if (soft[i]->priority == soft_pri
                && find_conjunctive_match(soft[i], n_soft_pri, &matches,
                                          cm_stubs, ARRAY_SIZE(cm_stubs),
                                          &id)) {
                uint32_t saved_conj_id = flow->conj_id;
                const struct cls_rule *rule;

                flow->conj_id = id;
                rule = classifier_lookup__(cls, version, flow, wc, false);
                flow->conj_id = saved_conj_id;

                if (rule) {
                    free_conjunctive_matches(&matches,
                                             cm_stubs, ARRAY_SIZE(cm_stubs));
                    if (soft != soft_stub) {
                        free(soft);
                    }
                    return rule;
                }
            }
        }
        free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs));

        /* There's no real match among the highest-priority soft matches.
         * However, if any of those soft matches has a lower-priority but
         * otherwise identical flow match, then we need to consider those for
         * soft or hard matches.
         *
         * The next iteration of the soft match loop will delete any null
         * pointers we put into 'soft' (and some others too). */
        for (int i = 0; i < n_soft; i++) {
            if (soft[i]->priority != soft_pri) {
                continue;
            }

            /* Find next-lower-priority flow with identical flow match. */
            match = next_visible_rule_in_list(soft[i]->match, version);
            if (match) {
                soft[i] = ovsrcu_get(struct cls_conjunction_set *,
                                     &match->conj_set);
                if (!soft[i]) {
                    /* The flow is a hard match; don't treat as a soft
                     * match. */
                    if (match->priority > hard_pri) {
                        hard = match;
                        hard_pri = hard->priority;
                    }
                }
            } else {
                /* No such lower-priority flow (probably the common case). */
                soft[i] = NULL;
            }
        }
    }

    if (soft != soft_stub) {
        free(soft);
    }
    return hard ? hard->cls_rule : NULL;
}

/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and
 * that is visible in 'version'.  Returns a null pointer if no rules in 'cls'
 * match 'flow'.  If multiple rules of equal priority match 'flow', returns one
 * arbitrarily.
 *
 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
 * set of bits that were significant in the lookup.  At some point
 * earlier, 'wc' should have been initialized (e.g., by
 * flow_wildcards_init_catchall()).
 *
 * 'flow' is non-const to allow for temporary modifications during the lookup.
 * Any changes are restored before returning. */
const struct cls_rule *
classifier_lookup(const struct classifier *cls, ovs_version_t version,
                  struct flow *flow, struct flow_wildcards *wc)
{
    return classifier_lookup__(cls, version, flow, wc, true);
}

/* Finds and returns a rule in 'cls' with exactly the same priority and
 * matching criteria as 'target', and that is visible in 'version'.
 * Only one such rule may ever exist.  Returns a null pointer if 'cls' doesn't
 * contain an exact match. */
const struct cls_rule *
classifier_find_rule_exactly(const struct classifier *cls,
                             const struct cls_rule *target,
                             ovs_version_t version)
{
    const struct cls_match *head, *rule;
    const struct cls_subtable *subtable;

    subtable = find_subtable(cls, target->match.mask);
    if (!subtable) {
        return NULL;
    }

    head = find_equal(subtable, target->match.flow,
                      miniflow_hash_in_minimask(target->match.flow,
                                                target->match.mask, 0));
    if (!head) {
        return NULL;
    }
    CLS_MATCH_FOR_EACH (rule, head) {
        if (rule->priority < target->priority) {
            break; /* Not found. */
        }
        if (rule->priority == target->priority
            && cls_match_visible_in_version(rule, version)) {
            return rule->cls_rule;
        }
    }
    return NULL;
}

/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
 * same matching criteria as 'target', and that is visible in 'version'.
 * Returns a null pointer if 'cls' doesn't contain an exact match visible in
 * 'version'. */
const struct cls_rule *
classifier_find_match_exactly(const struct classifier *cls,
                              const struct match *target, int priority,
                              ovs_version_t version)
{
    const struct cls_rule *retval;
    struct cls_rule cr;

    cls_rule_init(&cr, target, priority);
    retval = classifier_find_rule_exactly(cls, &cr, version);
    cls_rule_destroy(&cr);

    return retval;
}

/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
 * same matching criteria as 'target', and that is visible in 'version'.
 * Returns a null pointer if 'cls' doesn't contain an exact match visible in
 * 'version'. */
const struct cls_rule *
classifier_find_minimatch_exactly(const struct classifier *cls,
                              const struct minimatch *target, int priority,
                              ovs_version_t version)
{
    const struct cls_rule *retval;
    struct cls_rule cr;

    cls_rule_init_from_minimatch(&cr, target, priority);
    retval = classifier_find_rule_exactly(cls, &cr, version);
    cls_rule_destroy(&cr);

    return retval;
}

/* Checks if 'target' would overlap any other rule in 'cls' in 'version'.  Two
 * rules are considered to overlap if both rules have the same priority and a
 * packet could match both, and if both rules are visible in the same version.
 *
 * A trivial example of overlapping rules is two rules matching disjoint sets
 * of fields. E.g., if one rule matches only on port number, while another only
 * on dl_type, any packet from that specific port and with that specific
 * dl_type could match both, if the rules also have the same priority. */
bool
classifier_rule_overlaps(const struct classifier *cls,
                         const struct cls_rule *target, ovs_version_t version)
{
    struct cls_subtable *subtable;

    /* Iterate subtables in the descending max priority order. */
    PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority, 2,
                               sizeof(struct cls_subtable), &cls->subtables) {
        struct {
            struct minimask mask;
            uint64_t storage[FLOW_U64S];
        } m;
        const struct cls_rule *rule;

        minimask_combine(&m.mask, target->match.mask, &subtable->mask,
                         m.storage);

        RCULIST_FOR_EACH (rule, node, &subtable->rules_list) {
            if (rule->priority == target->priority
                && miniflow_equal_in_minimask(target->match.flow,
                                              rule->match.flow, &m.mask)
                && cls_rule_visible_in_version(rule, version)) {
                return true;
            }
        }
    }
    return false;
}

/* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
 * specific than 'criteria'.  That is, 'rule' matches 'criteria' and this
 * function returns true if, for every field:
 *
 *   - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
 *     field, or
 *
 *   - 'criteria' wildcards the field,
 *
 * Conversely, 'rule' does not match 'criteria' and this function returns false
 * if, for at least one field:
 *
 *   - 'criteria' and 'rule' specify different values for the field, or
 *
 *   - 'criteria' specifies a value for the field but 'rule' wildcards it.
 *
 * Equivalently, the truth table for whether a field matches is:
 *
 *                                     rule
 *
 *                   c         wildcard    exact
 *                   r        +---------+---------+
 *                   i   wild |   yes   |   yes   |
 *                   t   card |         |         |
 *                   e        +---------+---------+
 *                   r  exact |    no   |if values|
 *                   i        |         |are equal|
 *                   a        +---------+---------+
 *
 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
 * commands and by OpenFlow 1.0 aggregate and flow stats.
 *
 * Ignores rule->priority. */
bool
cls_rule_is_loose_match(const struct cls_rule *rule,
                        const struct minimatch *criteria)
{
    return (!minimask_has_extra(rule->match.mask, criteria->mask)
            && miniflow_equal_in_minimask(rule->match.flow, criteria->flow,
                                          criteria->mask));
}

/* Iteration. */

static bool
rule_matches(const struct cls_rule *rule, const struct cls_rule *target,
             ovs_version_t version)
{
    /* Rule may only match a target if it is visible in target's version. */
    return cls_rule_visible_in_version(rule, version)
        && (!target || miniflow_equal_in_minimask(rule->match.flow,
                                                  target->match.flow,
                                                  target->match.mask));
}

static const struct cls_rule *
search_subtable(const struct cls_subtable *subtable,
                struct cls_cursor *cursor)
{
    if (!cursor->target
        || !minimask_has_extra(&subtable->mask, cursor->target->match.mask)) {
        const struct cls_rule *rule;

        RCULIST_FOR_EACH (rule, node, &subtable->rules_list) {
            if (rule_matches(rule, cursor->target, cursor->version)) {
                return rule;
            }
        }
    }
    return NULL;
}

/* Initializes 'cursor' for iterating through rules in 'cls', and returns the
 * cursor.
 *
 *     - If 'target' is null, or if the 'target' is a catchall target, the
 *       cursor will visit every rule in 'cls' that is visible in 'version'.
 *
 *     - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
 *       such that cls_rule_is_loose_match(rule, target) returns true and that
 *       the rule is visible in 'version'.
 *
 * Ignores target->priority. */
struct cls_cursor
cls_cursor_start(const struct classifier *cls, const struct cls_rule *target,
                 ovs_version_t version)
{
    struct cls_cursor cursor;
    struct cls_subtable *subtable;

    cursor.cls = cls;
    cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL;
    cursor.version = version;
    cursor.rule = NULL;

    /* Find first rule. */
    PVECTOR_CURSOR_FOR_EACH (subtable, &cursor.subtables,
                             &cursor.cls->subtables) {
        const struct cls_rule *rule = search_subtable(subtable, &cursor);

        if (rule) {
            cursor.subtable = subtable;
            cursor.rule = rule;
            break;
        }
    }

    return cursor;
}

static const struct cls_rule *
cls_cursor_next(struct cls_cursor *cursor)
{
    const struct cls_rule *rule;
    const struct cls_subtable *subtable;

    rule = cursor->rule;
    subtable = cursor->subtable;
    RCULIST_FOR_EACH_CONTINUE (rule, node, &subtable->rules_list) {
        if (rule_matches(rule, cursor->target, cursor->version)) {
            return rule;
        }
    }

    PVECTOR_CURSOR_FOR_EACH_CONTINUE (subtable, &cursor->subtables) {
        rule = search_subtable(subtable, cursor);
        if (rule) {
            cursor->subtable = subtable;
            return rule;
        }
    }

    return NULL;
}

/* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
 * or to null if all matching rules have been visited. */
void
cls_cursor_advance(struct cls_cursor *cursor)
{
    cursor->rule = cls_cursor_next(cursor);
}

static struct cls_subtable *
find_subtable(const struct classifier *cls, const struct minimask *mask)
{
    struct cls_subtable *subtable;

    CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0),
                             &cls->subtables_map) {
        if (minimask_equal(mask, &subtable->mask)) {
            return subtable;
        }
    }
    return NULL;
}

/* Initializes 'map' with a subset of 'miniflow''s maps that includes only the
 * portions with u64-offset 'i' such that 'start' <= i < 'end'.  Does not copy
 * any data from 'miniflow' to 'map'. */
static struct flowmap
miniflow_get_map_in_range(const struct miniflow *miniflow, uint8_t start,
                          uint8_t end)
{
    struct flowmap map;
    size_t ofs = 0;

    map = miniflow->map;

    /* Clear the bits before 'start'. */
    while (start >= MAP_T_BITS) {
        start -= MAP_T_BITS;
        ofs += MAP_T_BITS;
        map.bits[start / MAP_T_BITS] = 0;
    }
    if (start > 0) {
        flowmap_clear(&map, ofs, start);
    }

    /* Clear the bits starting at 'end'. */
    if (end < FLOW_U64S) {
        /* flowmap_clear() can handle at most MAP_T_BITS at a time. */
        ovs_assert(FLOW_U64S - end <= MAP_T_BITS);
        flowmap_clear(&map, end, FLOW_U64S - end);
    }
    return map;
}

/* The new subtable will be visible to the readers only after this. */
static struct cls_subtable *
insert_subtable(struct classifier *cls, const struct minimask *mask)
{
    uint32_t hash = minimask_hash(mask, 0);
    struct cls_subtable *subtable;
    int i, index = 0;
    struct flowmap stage_map;
    uint8_t prev;
    size_t count = miniflow_n_values(&mask->masks);

    subtable = xzalloc(sizeof *subtable + MINIFLOW_VALUES_SIZE(count));
    cmap_init(&subtable->rules);
    miniflow_clone(CONST_CAST(struct miniflow *, &subtable->mask.masks),
                   &mask->masks, count);

    /* Init indices for segmented lookup, if any. */
    prev = 0;
    for (i = 0; i < cls->n_flow_segments; i++) {
        stage_map = miniflow_get_map_in_range(&mask->masks, prev,
                                              cls->flow_segments[i]);
        /* Add an index if it adds mask bits. */
        if (!flowmap_is_empty(stage_map)) {
            ccmap_init(&subtable->indices[index]);
            *CONST_CAST(struct flowmap *, &subtable->index_maps[index])
                = stage_map;
            index++;
        }
        prev = cls->flow_segments[i];
    }
    /* Map for the final stage. */
    *CONST_CAST(struct flowmap *, &subtable->index_maps[index])
        = miniflow_get_map_in_range(&mask->masks, prev, FLOW_U64S);
    /* Check if the final stage adds any bits. */
    if (index > 0) {
        if (flowmap_is_empty(subtable->index_maps[index])) {
            /* Remove the last index, as it has the same fields as the rules
             * map. */
            --index;
            ccmap_destroy(&subtable->indices[index]);
        }
    }
    *CONST_CAST(uint8_t *, &subtable->n_indices) = index;

    for (i = 0; i < cls->n_tries; i++) {
        subtable->trie_plen[i] = minimask_get_prefix_len(mask,
                                                         cls->tries[i].field);
    }

    /* Ports trie. */
    ovsrcu_set_hidden(&subtable->ports_trie, NULL);
    *CONST_CAST(int *, &subtable->ports_mask_len)
        = 32 - ctz32(ntohl(miniflow_get_ports(&mask->masks)));

    /* List of rules. */
    rculist_init(&subtable->rules_list);

    cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);

    return subtable;
}

/* RCU readers may still access the subtable before it is actually freed. */
static void
destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
{
    int i;

    pvector_remove(&cls->subtables, subtable);
    cmap_remove(&cls->subtables_map, &subtable->cmap_node,
                minimask_hash(&subtable->mask, 0));

    ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie)
               == NULL);
    ovs_assert(cmap_is_empty(&subtable->rules));
    ovs_assert(rculist_is_empty(&subtable->rules_list));

    for (i = 0; i < subtable->n_indices; i++) {
        ccmap_destroy(&subtable->indices[i]);
    }
    cmap_destroy(&subtable->rules);
    ovsrcu_postpone(free, subtable);
}

static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs);

/* Return 'true' if can skip rest of the subtable based on the prefix trie
 * lookup results. */
static inline bool
check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
            const unsigned int field_plen[CLS_MAX_TRIES],
            const struct flowmap range_map, const struct flow *flow,
            struct flow_wildcards *wc)
{
    int j;

    /* Check if we could avoid fully unwildcarding the next level of
     * fields using the prefix tries.  The trie checks are done only as
     * needed to avoid folding in additional bits to the wildcards mask. */
    for (j = 0; j < n_tries; j++) {
        /* Is the trie field relevant for this subtable, and
           is the trie field within the current range of fields? */
        if (field_plen[j] &&
            flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) {
            struct trie_ctx *ctx = &trie_ctx[j];

            /* On-demand trie lookup. */
            if (!ctx->lookup_done) {
                memset(&ctx->match_plens, 0, sizeof ctx->match_plens);
                ctx->maskbits = trie_lookup(ctx->trie, flow, &ctx->match_plens);
                ctx->lookup_done = true;
            }
            /* Possible to skip the rest of the subtable if subtable's
             * prefix on the field is not included in the lookup result. */
            if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) {
                /* We want the trie lookup to never result in unwildcarding
                 * any bits that would not be unwildcarded otherwise.
                 * Since the trie is shared by the whole classifier, it is
                 * possible that the 'maskbits' contain bits that are
                 * irrelevant for the partition relevant for the current
                 * packet.  Hence the checks below. */

                /* Check that the trie result will not unwildcard more bits
                 * than this subtable would otherwise. */
                if (ctx->maskbits <= field_plen[j]) {
                    /* Unwildcard the bits and skip the rest. */
                    mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
                    /* Note: Prerequisite already unwildcarded, as the only
                     * prerequisite of the supported trie lookup fields is
                     * the ethertype, which is always unwildcarded. */
                    return true;
                }
                /* Can skip if the field is already unwildcarded. */
                if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {
                    return true;
                }
            }
        }
    }
    return false;
}

/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
 * for which 'flow', for which 'mask' has a bit set, specifies a particular
 * value has the correct value in 'target'.
 *
 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
 * target, mask) but this is faster because of the invariant that
 * flow->map and mask->masks.map are the same, and that this version
 * takes the 'wc'. */
static inline bool
miniflow_and_mask_matches_flow(const struct miniflow *flow,
                               const struct minimask *mask,
                               const struct flow *target)
{
    const uint64_t *flowp = miniflow_get_values(flow);
    const uint64_t *maskp = miniflow_get_values(&mask->masks);
    const uint64_t *target_u64 = (const uint64_t *)target;
    map_t map;

    FLOWMAP_FOR_EACH_MAP (map, mask->masks.map) {
        size_t idx;

        MAP_FOR_EACH_INDEX (idx, map) {
            if ((*flowp++ ^ target_u64[idx]) & *maskp++) {
                return false;
            }
        }
        target_u64 += MAP_T_BITS;
    }
    return true;
}

static inline const struct cls_match *
find_match(const struct cls_subtable *subtable, ovs_version_t version,
           const struct flow *flow, uint32_t hash)
{
    const struct cls_match *head, *rule;

    CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
        if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow,
                                                      &subtable->mask,
                                                      flow))) {
            /* Return highest priority rule that is visible. */
            CLS_MATCH_FOR_EACH (rule, head) {
                if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) {
                    return rule;
                }
            }
        }
    }

    return NULL;
}

static const struct cls_match *
find_match_wc(const struct cls_subtable *subtable, ovs_version_t version,
              const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
              unsigned int n_tries, struct flow_wildcards *wc)
{
    if (OVS_UNLIKELY(!wc)) {
        return find_match(subtable, version, flow,
                          flow_hash_in_minimask(flow, &subtable->mask, 0));
    }

    uint32_t basis = 0, hash;
    const struct cls_match *rule = NULL;
    struct flowmap stages_map = FLOWMAP_EMPTY_INITIALIZER;
    unsigned int mask_offset = 0;
    int i;

    /* Try to finish early by checking fields in segments. */
    for (i = 0; i < subtable->n_indices; i++) {
        if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
                        subtable->index_maps[i], flow, wc)) {
            /* 'wc' bits for the trie field set, now unwildcard the preceding
             * bits used so far. */
            goto no_match;
        }

        /* Accumulate the map used so far. */
        stages_map = flowmap_or(stages_map, subtable->index_maps[i]);

        hash = flow_hash_in_minimask_range(flow, &subtable->mask,
                                           subtable->index_maps[i],
                                           &mask_offset, &basis);

        if (!ccmap_find(&subtable->indices[i], hash)) {
            goto no_match;
        }
    }
    /* Trie check for the final range. */
    if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
                    subtable->index_maps[i], flow, wc)) {
        goto no_match;
    }
    hash = flow_hash_in_minimask_range(flow, &subtable->mask,
                                       subtable->index_maps[i],
                                       &mask_offset, &basis);
    rule = find_match(subtable, version, flow, hash);
    if (!rule && subtable->ports_mask_len) {
        /* The final stage had ports, but there was no match.  Instead of
         * unwildcarding all the ports bits, use the ports trie to figure out a
         * smaller set of bits to unwildcard. */
        unsigned int mbits;
        ovs_be32 value, plens, mask;

        mask = miniflow_get_ports(&subtable->mask.masks);
        value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
        mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);

        ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
            mask & be32_prefix_mask(mbits);

        goto no_match;
    }

    /* Must unwildcard all the fields, as they were looked at. */
    flow_wildcards_fold_minimask(wc, &subtable->mask);
    return rule;

no_match:
    /* Unwildcard the bits in stages so far, as they were used in determining
     * there is no match. */
    flow_wildcards_fold_minimask_in_map(wc, &subtable->mask, stages_map);
    return NULL;
}

static struct cls_match *
find_equal(const struct cls_subtable *subtable, const struct miniflow *flow,
           uint32_t hash)
{
    struct cls_match *head;

    CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
        if (miniflow_equal(&head->flow, flow)) {
            return head;
        }
    }
    return NULL;
}

/* A longest-prefix match tree. */

/* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
 * Prefixes are in the network byte order, and the offset 0 corresponds to
 * the most significant bit of the first byte.  The offset can be read as
 * "how many bits to skip from the start of the prefix starting at 'pr'". */
static uint32_t
raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
{
    uint32_t prefix;

    pr += ofs / 32; /* Where to start. */
    ofs %= 32;      /* How many bits to skip at 'pr'. */

    prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */
    if (plen > 32 - ofs) {      /* Need more than we have already? */
        prefix |= ntohl(*++pr) >> (32 - ofs);
    }
    /* Return with possible unwanted bits at the end. */
    return prefix;
}

/* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
 * offset 'ofs'.  Prefixes are in the network byte order, and the offset 0
 * corresponds to the most significant bit of the first byte.  The offset can
 * be read as "how many bits to skip from the start of the prefix starting at
 * 'pr'". */
static uint32_t
trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
{
    if (!plen) {
        return 0;
    }
    if (plen > TRIE_PREFIX_BITS) {
        plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */
    }
    /* Return with unwanted bits cleared. */
    return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen);
}

/* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
 * starting at "MSB 0"-based offset 'ofs'. */
static unsigned int
prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[],
                  unsigned int ofs)
{
    uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits);
    /* Set the bit after the relevant bits to limit the result. */
    return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits));
}

/* Return the number of equal bits in 'node' prefix and a 'prefix' of length
 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
static unsigned int
trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[],
                       unsigned int ofs, unsigned int plen)
{
    return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs),
                             prefix, ofs);
}

/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int.  'ofs' can
 * be greater than 31. */
static unsigned int
be_get_bit_at(const ovs_be32 value[], unsigned int ofs)
{
    return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u;
}

/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int.  'ofs' must
 * be between 0 and 31, inclusive. */
static unsigned int
get_bit_at(const uint32_t prefix, unsigned int ofs)
{
    return (prefix >> (31 - ofs)) & 1u;
}

/* Create new branch. */
static struct trie_node *
trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen,
                   unsigned int n_rules)
{
    struct trie_node *node = xmalloc(sizeof *node);

    node->prefix = trie_get_prefix(prefix, ofs, plen);

    if (plen <= TRIE_PREFIX_BITS) {
        node->n_bits = plen;
        ovsrcu_set_hidden(&node->edges[0], NULL);
        ovsrcu_set_hidden(&node->edges[1], NULL);
        node->n_rules = n_rules;
    } else { /* Need intermediate nodes. */
        struct trie_node *subnode = trie_branch_create(prefix,
                                                       ofs + TRIE_PREFIX_BITS,
                                                       plen - TRIE_PREFIX_BITS,
                                                       n_rules);
        int bit = get_bit_at(subnode->prefix, 0);
        node->n_bits = TRIE_PREFIX_BITS;
        ovsrcu_set_hidden(&node->edges[bit], subnode);
        ovsrcu_set_hidden(&node->edges[!bit], NULL);
        node->n_rules = 0;
    }
    return node;
}

static void
trie_node_destroy(const struct trie_node *node)
{
    ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node));
}

/* Copy a trie node for modification and postpone delete the old one. */
static struct trie_node *
trie_node_rcu_realloc(const struct trie_node *node)
{
    struct trie_node *new_node = xmalloc(sizeof *node);

    *new_node = *node;
    trie_node_destroy(node);

    return new_node;
}

static void
trie_destroy(rcu_trie_ptr *trie)
{
    struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie);

    if (node) {
        ovsrcu_set_hidden(trie, NULL);
        trie_destroy(&node->edges[0]);
        trie_destroy(&node->edges[1]);
        trie_node_destroy(node);
    }
}

static bool
trie_is_leaf(const struct trie_node *trie)
{
    /* No children? */
    return !ovsrcu_get(struct trie_node *, &trie->edges[0])
        && !ovsrcu_get(struct trie_node *, &trie->edges[1]);
}

static void
mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs,
                     unsigned int n_bits)
{
    ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
    unsigned int i;

    for (i = 0; i < n_bits / 32; i++) {
        mask[i] = OVS_BE32_MAX;
    }
    if (n_bits % 32) {
        mask[i] |= htonl(~0u << (32 - n_bits % 32));
    }
}

static bool
mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs,
                     unsigned int n_bits)
{
    ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
    unsigned int i;
    ovs_be32 zeroes = 0;

    for (i = 0; i < n_bits / 32; i++) {
        zeroes |= ~mask[i];
    }
    if (n_bits % 32) {
        zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32));
    }

    return !zeroes; /* All 'n_bits' bits set. */
}

static rcu_trie_ptr *
trie_next_edge(struct trie_node *node, const ovs_be32 value[],
               unsigned int ofs)
{
    return node->edges + be_get_bit_at(value, ofs);
}

static const struct trie_node *
trie_next_node(const struct trie_node *node, const ovs_be32 value[],
               unsigned int ofs)
{
    return ovsrcu_get(struct trie_node *,
                      &node->edges[be_get_bit_at(value, ofs)]);
}

/* Set the bit at ("MSB 0"-based) offset 'ofs'.  'ofs' can be greater than 31.
 */
static void
be_set_bit_at(ovs_be32 value[], unsigned int ofs)
{
    ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8);
}

/* Returns the number of bits in the prefix mask necessary to determine a
 * mismatch, in case there are longer prefixes in the tree below the one that
 * matched.
 * '*plens' will have a bit set for each prefix length that may have matching
 * rules.  The caller is responsible for clearing the '*plens' prior to
 * calling this.
 */
static unsigned int
trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[],
                  ovs_be32 plens[], unsigned int n_bits)
{
    const struct trie_node *prev = NULL;
    const struct trie_node *node = ovsrcu_get(struct trie_node *, trie);
    unsigned int match_len = 0; /* Number of matching bits. */

    for (; node; prev = node, node = trie_next_node(node, value, match_len)) {
        unsigned int eqbits;
        /* Check if this edge can be followed. */
        eqbits = prefix_equal_bits(node->prefix, node->n_bits, value,
                                   match_len);
        match_len += eqbits;
        if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */
            /* Bit at offset 'match_len' differed. */
            return match_len + 1; /* Includes the first mismatching bit. */
        }
        /* Full match, check if rules exist at this prefix length. */
        if (node->n_rules > 0) {
            be_set_bit_at(plens, match_len - 1);
        }
        if (match_len >= n_bits) {
            return n_bits; /* Full prefix. */
        }
    }
    /* node == NULL.  Full match so far, but we tried to follow an
     * non-existing branch.  Need to exclude the other branch if it exists
     * (it does not if we were called on an empty trie or 'prev' is a leaf
     * node). */
    return !prev || trie_is_leaf(prev) ? match_len : match_len + 1;
}

static unsigned int
trie_lookup(const struct cls_trie *trie, const struct flow *flow,
            union trie_prefix *plens)
{
    const struct mf_field *mf = trie->field;

    /* Check that current flow matches the prerequisites for the trie
     * field.  Some match fields are used for multiple purposes, so we
     * must check that the trie is relevant for this flow. */
    if (mf_are_prereqs_ok(mf, flow, NULL)) {
        return trie_lookup_value(&trie->root,
                                 &((ovs_be32 *)flow)[mf->flow_be32ofs],
                                 &plens->be32, mf->n_bits);
    }
    memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */
    return 0; /* Value not used in this case. */
}

/* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
 * 'miniflow_index' is not NULL. */
static unsigned int
minimask_get_prefix_len(const struct minimask *minimask,
                        const struct mf_field *mf)
{
    unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
    uint8_t be32_ofs = mf->flow_be32ofs;
    uint8_t be32_end = be32_ofs + mf->n_bytes / 4;

    for (; be32_ofs < be32_end; ++be32_ofs) {
        uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs));

        /* Validate mask, count the mask length. */
        if (mask_tz) {
            if (mask) {
                return 0; /* No bits allowed after mask ended. */
            }
        } else {
            if (~mask & (~mask + 1)) {
                return 0; /* Mask not contiguous. */
            }
            mask_tz = ctz32(mask);
            n_bits += 32 - mask_tz;
        }
    }

    return n_bits;
}

/*
 * This is called only when mask prefix is known to be CIDR and non-zero.
 * Relies on the fact that the flow and mask have the same map, and since
 * the mask is CIDR, the storage for the flow field exists even if it
 * happened to be zeros.
 */
static const ovs_be32 *
minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
{
    size_t u64_ofs = mf->flow_be32ofs / 2;

    return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
        + (mf->flow_be32ofs & 1);
}

/* Insert rule in to the prefix tree.
 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
 * in 'rule'. */
static void
trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
    trie_insert_prefix(&trie->root,
                       minimatch_get_prefix(&rule->match, trie->field), mlen);
}

static void
trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen)
{
    struct trie_node *node;
    int ofs = 0;

    /* Walk the tree. */
    for (; (node = ovsrcu_get_protected(struct trie_node *, edge));
         edge = trie_next_edge(node, prefix, ofs)) {
        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
        ofs += eqbits;
        if (eqbits < node->n_bits) {
            /* Mismatch, new node needs to be inserted above. */
            int old_branch = get_bit_at(node->prefix, eqbits);
            struct trie_node *new_parent;

            new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits,
                                            ofs == mlen ? 1 : 0);
            /* Copy the node to modify it. */
            node = trie_node_rcu_realloc(node);
            /* Adjust the new node for its new position in the tree. */
            node->prefix <<= eqbits;
            node->n_bits -= eqbits;
            ovsrcu_set_hidden(&new_parent->edges[old_branch], node);

            /* Check if need a new branch for the new rule. */
            if (ofs < mlen) {
                ovsrcu_set_hidden(&new_parent->edges[!old_branch],
                                  trie_branch_create(prefix, ofs, mlen - ofs,
                                                     1));
            }
            ovsrcu_set(edge, new_parent); /* Publish changes. */
            return;
        }
        /* Full match so far. */

        if (ofs == mlen) {
            /* Full match at the current node, rule needs to be added here. */
            node->n_rules++;
            return;
        }
    }
    /* Must insert a new tree branch for the new rule. */
    ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1));
}

/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
 * in 'rule'. */
static void
trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
    trie_remove_prefix(&trie->root,
                       minimatch_get_prefix(&rule->match, trie->field), mlen);
}

/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
 * in 'rule'. */
static void
trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen)
{
    struct trie_node *node;
    rcu_trie_ptr *edges[sizeof(union trie_prefix) * CHAR_BIT];
    int depth = 0, ofs = 0;

    /* Walk the tree. */
    for (edges[0] = root;
         (node = ovsrcu_get_protected(struct trie_node *, edges[depth]));
         edges[++depth] = trie_next_edge(node, prefix, ofs)) {
        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);

        if (eqbits < node->n_bits) {
            /* Mismatch, nothing to be removed.  This should never happen, as
             * only rules in the classifier are ever removed. */
            break; /* Log a warning. */
        }
        /* Full match so far. */
        ofs += eqbits;

        if (ofs == mlen) {
            /* Full prefix match at the current node, remove rule here. */
            if (!node->n_rules) {
                break; /* Log a warning. */
            }
            node->n_rules--;

            /* Check if can prune the tree. */
            while (!node->n_rules) {
                struct trie_node *next,
                    *edge0 = ovsrcu_get_protected(struct trie_node *,
                                                  &node->edges[0]),
                    *edge1 = ovsrcu_get_protected(struct trie_node *,
                                                  &node->edges[1]);

                if (edge0 && edge1) {
                    break; /* A branching point, cannot prune. */
                }

                /* Else have at most one child node, remove this node. */
                next = edge0 ? edge0 : edge1;

                if (next) {
                    if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) {
                        break;   /* Cannot combine. */
                    }
                    next = trie_node_rcu_realloc(next); /* Modify. */

                    /* Combine node with next. */
                    next->prefix = node->prefix | next->prefix >> node->n_bits;
                    next->n_bits += node->n_bits;
                }
                /* Update the parent's edge. */
                ovsrcu_set(edges[depth], next); /* Publish changes. */
                trie_node_destroy(node);

                if (next || !depth) {
                    /* Branch not pruned or at root, nothing more to do. */
                    break;
                }
                node = ovsrcu_get_protected(struct trie_node *,
                                            edges[--depth]);
            }
            return;
        }
    }
    /* Cannot go deeper. This should never happen, since only rules
     * that actually exist in the classifier are ever removed. */
}


#define CLS_MATCH_POISON (struct cls_match *)(UINTPTR_MAX / 0xf * 0xb)

void
cls_match_free_cb(struct cls_match *rule)
{
    ovsrcu_set_hidden(&rule->next, CLS_MATCH_POISON);
    free(rule);
}
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								/*
-												Support accepting and displaying port names in OVS tools.

Until now, most ovs-ofctl commands have not accepted names for ports, only
numbers, and have not been able to display port names either.  It's a lot
easier for users if they can use and see meaningful names instead of
arbitrary numbers.  This commit adds that support.

For backward compatibility, only interactive ovs-ofctl commands by default
display port names; to display them in scripts, use the new --names
option.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2017-05-31 16:06:12 -07:00
+								 * Copyright (c) 2009-2017 Nicira, Inc.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 */
 								#include <config.h>
 								#include "classifier.h"
-												lib/classifier: Add lib/classifier-private.h.

tests/test-classifier.c used to include lib/classifier.c to gain
access to the internal data structures and some utility functions.
This was confusing, so this patch splits the relevant groups of
classifier internal definations to a new file
(lib/classifier-private.h), which is included by both lib/classifier.c
and tests/test-classifier.c.  Other use of the new file is
discouraged.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-24 13:22:24 -07:00
+								#include "classifier-private.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include <errno.h>
-												sparse: Add guards to prevent FreeBSD-incompatible #include order.

FreeBSD insists that <sys/types.h> be included before <netinet/in.h> and
that <netinet/in.h> be included before <arpa/inet.h>.  This adds guards to
the "sparse" headers to yield a warning if this order is violated.  This
commit also adjusts the order of many #includes to suit this requirement.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2017-11-06 14:42:32 -08:00
+								#include <sys/types.h>
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include <netinet/in.h>
-												flow: Remove flow_to/from_match() in favor of cls_rule_to/from_match().

The flow_from_match() and flow_to_match() functions have to deal with most
of the state in a cls_rule anyhow, and this will increase in upcoming
commits, to the point that we might as well just use a cls_rule anyhow.
This commit therefore deletes flow_from_match() and flow_to_match(),
integrating their code into cls_rule_from_match() and the new function
cls_rule_to_match(), respectively.  It also changes each of the functions'
callers to use the new cls_rule_*() function.

											
										
										
											2010-11-22 10:10:14 -08:00
+								#include "byte-order.h"
-												Move lib/dynamic-string.h to include/openvswitch directory

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-03 10:20:46 -08:00
+								#include "openvswitch/dynamic-string.h"
-												classifier: Implement better classifier rule formatting.

The old formatting was only good enough for debugging, but now we need to
be able to format cls_rules as part of ofp-print.c.  This new code is
modeled after ofp_match_to_string().

											
										
										
											2010-11-23 12:31:50 -08:00
+								#include "odp-util.h"
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								#include "packets.h"
-												lib/classifier: Clean up includes.

Remove unnecessary includes from lib/classifier.h and add them to
lib/classifier.c as needed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-11 11:07:43 -07:00
+								#include "util.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								struct trie_ctx;
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								/* A collection of "struct cls_conjunction"s currently embedded into a
 								 * cls_match. */
 								struct cls_conjunction_set {
 								    /* Link back to the cls_match.
 								     *
 								     * cls_conjunction_set is mostly used during classifier lookup, and, in
 								     * turn, during classifier lookup the most used member of
 								     * cls_conjunction_set is the rule's priority, so we cache it here for fast
 								     * access. */
 								    struct cls_match *match;
 								    int priority;               /* Cached copy of match->priority. */
 								    /* Conjunction information.
 								     *
 								     * 'min_n_clauses' allows some optimization during classifier lookup. */
 								    unsigned int n;             /* Number of elements in 'conj'. */
 								    unsigned int min_n_clauses; /* Smallest 'n' among elements of 'conj'. */
 								    struct cls_conjunction conj[];
 								};
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								/* Ports trie depends on both ports sharing the same ovs_be32. */
 								#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
 								BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0);
 								#define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2)
-												lib/classifier: Hide more of the internal data structures.

It is better not to expose definitions not needed by users.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								static size_t
 								cls_conjunction_set_size(size_t n)
 								{
 								    return (sizeof(struct cls_conjunction_set)
 								            + n * sizeof(struct cls_conjunction));
 								}
 								static struct cls_conjunction_set *
 								cls_conjunction_set_alloc(struct cls_match *match,
 								                          const struct cls_conjunction conj[], size_t n)
 								{
 								    if (n) {
 								        size_t min_n_clauses = conj[0].n_clauses;
 								        for (size_t i = 1; i < n; i++) {
 								            min_n_clauses = MIN(min_n_clauses, conj[i].n_clauses);
 								        }
 								        struct cls_conjunction_set *set = xmalloc(cls_conjunction_set_size(n));
 								        set->match = match;
 								        set->priority = match->priority;
 								        set->n = n;
 								        set->min_n_clauses = min_n_clauses;
 								        memcpy(set->conj, conj, n * sizeof *conj);
 								        return set;
 								    } else {
 								        return NULL;
 								    }
 								}
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								static struct cls_match *
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								cls_match_alloc(const struct cls_rule *rule, ovs_version_t version,
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								                const struct cls_conjunction conj[], size_t n)
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								{
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    size_t count = miniflow_n_values(rule->match.flow);
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
 								    struct cls_match *cls_match
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								        = xmalloc(sizeof *cls_match + MINIFLOW_VALUES_SIZE(count));
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    ovsrcu_init(&cls_match->next, NULL);
-												classifier: Constify fields.

Some struct cls_match and cls_subtable fields were already documented
of being const.  Make them const and use CONST_CAST where appropriate
to initialize them.

This will help catch future errors modifying those fields after
initialization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-27 10:57:28 -07:00
+								    *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule;
 								    *CONST_CAST(int *, &cls_match->priority) = rule->priority;
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								    /* Make rule initially invisible. */
 								    cls_match->versions = VERSIONS_INITIALIZER(version, version);
-												flow: Eliminate miniflow_clone() and minimask_clone().

miniflow_clone() and minimask_clone() are no longer used, remove them
from the API.

Now that miniflow data is always inlined, it makes sense to rename
miniflow_clone_inline() miniflow_clone().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    miniflow_clone(CONST_CAST(struct miniflow *, &cls_match->flow),
 								                   rule->match.flow, count);
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    ovsrcu_set_hidden(&cls_match->conj_set,
 								                      cls_conjunction_set_alloc(cls_match, conj, n));
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
 								    return cls_match;
 								}
-												lib/classifier: Hide more of the internal data structures.

It is better not to expose definitions not needed by users.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								static struct cls_subtable *find_subtable(const struct classifier *cls,
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								                                          const struct minimask *);
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								static struct cls_subtable *insert_subtable(struct classifier *cls,
-												classifier: Remove internal mutex.

Almost all classifier users already exclude concurrent modifications,
or are single-threaded, hence the classifier internal mutex can be
removed.  Due to this change, ovs-router.c and tnl-ports.c need new
mutexes, which are added.

As noted by Ben in review, ovs_router_flush() should also free the
entries it removes from the classifier.  It now calls
ovsrcu_postpone() to that effect.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 15:58:09 -08:00
+								                                            const struct minimask *);
 								static void destroy_subtable(struct classifier *cls, struct cls_subtable *);
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								static const struct cls_match *find_match_wc(const struct cls_subtable *,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                                             ovs_version_t version,
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								                                             const struct flow *,
 								                                             struct trie_ctx *,
 								                                             unsigned int n_tries,
 								                                             struct flow_wildcards *);
 								static struct cls_match *find_equal(const struct cls_subtable *,
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								                                    const struct miniflow *, uint32_t hash);
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								/* Return the next visible (lower-priority) rule in the list.  Multiple
 								 * identical rules with the same priority may exist transitionally, but when
 								 * versioning is used at most one of them is ever visible for lookups on any
 								 * given 'version'. */
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								static inline const struct cls_match *
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								next_visible_rule_in_list(const struct cls_match *rule, ovs_version_t version)
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								{
 								    do {
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								        rule = cls_match_next(rule);
-												classifier: Simplify versioning.

After all, there are some cases in which both the insertion version
and removal version of a rule need to be considered.  This makes the
cls_match a bit bigger, but makes classifier versioning much simpler
to understand.

Also, avoid using type larger than int in an enum, as it is not
portable C.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-12 16:12:56 -07:00
+								    } while (rule && !cls_match_visible_in_version(rule, version));
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    return rule;
-												classifier: Use rculist.

The list of identical, but lower priority rules is not currently used
in classifier lookup.  A later patch introducing conjunctive matches
needs to access the list during lookups, so we must make the list RCU.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-31 16:22:23 -07:00
+								}
-												classifier: Do not use mf_value.

mf_value has grown bigger than needed for storing the biggest
supported prefix (IPv6 address length).  Define a new type to be used
instead of mf_value.

This makes classifier lookups a bit faster.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-12 17:03:07 -07:00
+								/* Type with maximum supported prefix length. */
 								union trie_prefix {
 								    struct in6_addr ipv6;  /* For sizing. */
 								    ovs_be32 be32;         /* For access. */
 								};
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								static unsigned int minimask_get_prefix_len(const struct minimask *,
 								                                            const struct mf_field *);
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								static void trie_init(struct classifier *cls, int trie_idx,
-												classifier: Remove internal mutex.

Almost all classifier users already exclude concurrent modifications,
or are single-threaded, hence the classifier internal mutex can be
removed.  Due to this change, ovs-router.c and tnl-ports.c need new
mutexes, which are added.

As noted by Ben in review, ovs_router_flush() should also free the
entries it removes from the classifier.  It now calls
ovsrcu_postpone() to that effect.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 15:58:09 -08:00
+								                      const struct mf_field *);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
-												classifier: Do not use mf_value.

mf_value has grown bigger than needed for storing the biggest
supported prefix (IPv6 address length).  Define a new type to be used
instead of mf_value.

This makes classifier lookups a bit faster.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-12 17:03:07 -07:00
+								                                union trie_prefix *plens);
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								static unsigned int trie_lookup_value(const rcu_trie_ptr *,
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								                                      const ovs_be32 value[], ovs_be32 plens[],
 								                                      unsigned int value_bits);
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								static void trie_destroy(rcu_trie_ptr *);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								                               int mlen);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								                               int mlen);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                                 unsigned int n_bits);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								static bool mask_prefix_bits_set(const struct flow_wildcards *,
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                                 uint8_t be32ofs, unsigned int n_bits);
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
 								/* cls_rule. */
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								static inline void
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								cls_rule_init__(struct cls_rule *rule, unsigned int priority)
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								{
 								    rculist_init(&rule->node);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    *CONST_CAST(int *, &rule->priority) = priority;
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    ovsrcu_init(&rule->cls_match, NULL);
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								}
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								/* Initializes 'rule' to match packets specified by 'match' at the given
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								 * 'priority'.  'match' must satisfy the invariant described in the comment at
 								 * the definition of struct match.
-												nx-match: Implement support for arbitrary VLAN TCI masks.

Since the Nicira Extended Match was specified nicira-ext.h has claimed that
arbitrary masks are allowed, but in fact only certain masks were actually
implemented.  This commit implements general masking for the 802.1Q VLAN
TCI field.

											
										
										
											2010-11-23 10:06:28 -08:00
+								 *
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								 * The caller must eventually destroy 'rule' with cls_rule_destroy().
 								 *
-												classifier: Change type used for priorities from 'unsigned int' to 'int'.

OpenFlow has priorities in the 16-bit unsigned range, from 0 to 65535.
In the classifier, it is sometimes useful to be able to have values below
and above this range.  With the 'unsigned int' type used for priorities
until now, there were no values below the range, so some code worked
around it by converting priorities to 64-bit signed integers.  This didn't
seem so great to me given that a plain 'int' also had the needed range.
This commit therefore changes the type used for priorities to int.

The interesting parts of this change are in pvector.h and classifier.c,
where one can see the elimination of the use of int64_t.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-30 11:40:07 -07:00
+								 * Clients should not use priority INT_MIN.  (OpenFlow uses priorities between
 								 * 0 and UINT16_MAX, inclusive.) */
-												nicira-ext: Support masking of nd_target field

This commit adds support to specify a mask in CIDR format for
the nd_target field.

Signed-off-by: Ansis Atteka <aatteka@nicira.com>

											
										
										
											2012-04-25 15:48:40 -07:00
+								void
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								cls_rule_init(struct cls_rule *rule, const struct match *match, int priority)
-												nicira-ext: Support masking of nd_target field

This commit adds support to specify a mask in CIDR format for
the nd_target field.

Signed-off-by: Ansis Atteka <aatteka@nicira.com>

											
										
										
											2012-04-25 15:48:40 -07:00
+								{
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    cls_rule_init__(rule, priority);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match);
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								}
 								/* Same as cls_rule_init() for initialization from a "struct minimatch". */
 								void
 								cls_rule_init_from_minimatch(struct cls_rule *rule,
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								                             const struct minimatch *match, int priority)
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								{
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    cls_rule_init__(rule, priority);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match);
-												nicira-ext: Support matching IPv6 Neighbor Discovery messages.

IPv6 uses Neighbor Discovery messages in a similar manner to how IPv4
uses ARP.  This commit adds support for matching deeper into the
payloads of Neighbor Solicitation (NS) and Neighbor Advertisement (NA)
messages.  Currently, the matching fields include:

    - NS and NA Target (nd_target)
    - NS Source Link Layer Address (nd_sll)
    - NA Target Link Layer Address (nd_tll)

When defining IPv6 Neighbor Discovery rules, the Nicira Extensible Match
(NXM) extension to OVS must be used.

Signed-off-by: Justin Pettit <jpettit@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-02-01 22:54:11 -08:00
+								}
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								/* Initializes 'dst' as a copy of 'src'.
 								 *
-												classifier: New function cls_rule_move().

This function will acquire its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-08-27 12:25:48 -07:00
+								 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								void
 								cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src)
 								{
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    cls_rule_init__(dst, src->priority);
 								    minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match);
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								}
-												classifier: New function cls_rule_move().

This function will acquire its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-08-27 12:25:48 -07:00
+								/* Initializes 'dst' with the data in 'src', destroying 'src'.
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								 *
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								 * 'src' must be a cls_rule NOT in a classifier.
-												classifier: New function cls_rule_move().

This function will acquire its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-08-27 12:25:48 -07:00
+								 *
 								 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
 								void
 								cls_rule_move(struct cls_rule *dst, struct cls_rule *src)
 								{
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    cls_rule_init__(dst, src->priority);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    minimatch_move(CONST_CAST(struct minimatch *, &dst->match),
 								                   CONST_CAST(struct minimatch *, &src->match));
-												classifier: New function cls_rule_move().

This function will acquire its first user in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-08-27 12:25:48 -07:00
+								}
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								/* Frees memory referenced by 'rule'.  Doesn't free 'rule' itself (it's
 								 * normally embedded into a larger structure).
 								 *
 								 * ('rule' must not currently be in a classifier.) */
 								void
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								cls_rule_destroy(struct cls_rule *rule)
-												rculist: Remove postponed poisoning.

Postponed 'next' member poisoning was based on the faulty assumption
that postponed functions would be called in the order they were
postponed.  This assumption holds only for the functions postponed by
any single thread.  When functions are postponed by different
threads, there are no guarantees of the order in which the functions
may be called, or timing between those calls after the next grace
period has passed.

Given this, the postponed poisoning could have executed after
postponed destruction of the object containing the rculist element.

This bug was revealed after the memory leaks on rule deletion were
recently fixed.

This patch removes the postponed 'next' member poisoning and adds
documentation describing the ordering limitations in OVS RCU.

Alex Wang dug out the root cause of the resulting crashes, thanks!

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
											
										
										
											2015-06-11 17:28:37 -07:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								{
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    /* Must not be in a classifier. */
 								    ovs_assert(!get_cls_match_protected(rule));
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
-												rculist: Remove postponed poisoning.

Postponed 'next' member poisoning was based on the faulty assumption
that postponed functions would be called in the order they were
postponed.  This assumption holds only for the functions postponed by
any single thread.  When functions are postponed by different
threads, there are no guarantees of the order in which the functions
may be called, or timing between those calls after the next grace
period has passed.

Given this, the postponed poisoning could have executed after
postponed destruction of the object containing the rculist element.

This bug was revealed after the memory leaks on rule deletion were
recently fixed.

This patch removes the postponed 'next' member poisoning and adds
documentation describing the ordering limitations in OVS RCU.

Alex Wang dug out the root cause of the resulting crashes, thanks!

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
											
										
										
											2015-06-11 17:28:37 -07:00
+								    /* Check that the rule has been properly removed from the classifier. */
 								    ovs_assert(rule->node.prev == RCULIST_POISON
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								               || rculist_is_empty(&rule->node));
-												rculist: Remove postponed poisoning.

Postponed 'next' member poisoning was based on the faulty assumption
that postponed functions would be called in the order they were
postponed.  This assumption holds only for the functions postponed by
any single thread.  When functions are postponed by different
threads, there are no guarantees of the order in which the functions
may be called, or timing between those calls after the next grace
period has passed.

Given this, the postponed poisoning could have executed after
postponed destruction of the object containing the rculist element.

This bug was revealed after the memory leaks on rule deletion were
recently fixed.

This patch removes the postponed 'next' member poisoning and adds
documentation describing the ordering limitations in OVS RCU.

Alex Wang dug out the root cause of the resulting crashes, thanks!

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>
											
										
										
											2015-06-11 17:28:37 -07:00
+								    rculist_poison__(&rule->node);   /* Poisons also the next pointer. */
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match));
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								}
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								/* This may only be called by the exclusive writer. */
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								void
 								cls_rule_set_conjunctions(struct cls_rule *cr,
 								                          const struct cls_conjunction *conj, size_t n)
 								{
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    struct cls_match *match = get_cls_match_protected(cr);
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    struct cls_conjunction_set *old
 								        = ovsrcu_get_protected(struct cls_conjunction_set *, &match->conj_set);
 								    struct cls_conjunction *old_conj = old ? old->conj : NULL;
 								    unsigned int old_n = old ? old->n : 0;
 								    if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) {
 								        if (old) {
 								            ovsrcu_postpone(free, old);
 								        }
 								        ovsrcu_set(&match->conj_set,
 								                   cls_conjunction_set_alloc(match, conj, n));
 								    }
 								}
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								/* Returns true if 'a' and 'b' match the same packets at the same priority,
 								 * false if they differ in some way. */
-												classifier: New function cls_rule_equal().

											
										
										
											2010-11-08 16:35:34 -08:00
+								bool
 								cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b)
 								{
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								    return a->priority == b->priority && minimatch_equal(&a->match, &b->match);
-												classifier: New function cls_rule_equal().

											
										
										
											2010-11-08 16:35:34 -08:00
+								}
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								/* Appends a string describing 'rule' to 's'. */
-												classifier: Implement better classifier rule formatting.

The old formatting was only good enough for debugging, but now we need to
be able to format cls_rules as part of ofp-print.c.  This new code is
modeled after ofp_match_to_string().

											
										
										
											2010-11-23 12:31:50 -08:00
+								void
-												tun-metadata: Manage tunnel TLV mapping table on a per-bridge basis.

When using tunnel TLVs (at the moment, this means Geneve options), a
controller must first map the class and type onto an appropriate OXM
field so that it can be used in OVS flow operations. This table is
managed using OpenFlow extensions.

The original code that added support for TLVs made the mapping table
global as a simplification. However, this is not really logically
correct as the OpenFlow management commands are operating on a per-bridge
basis. This removes the original limitation to make the table per-bridge.

One nice result of this change is that it is generally clearer whether
the tunnel metadata is in datapath or OpenFlow format. Rather than
allowing ad-hoc format changes and trying to handle both formats in the
tunnel metadata functions, the format is more clearly separated by function.
Datapaths (both kernel and userspace) use datapath format and it is not
changed during the upcall process. At the beginning of action translation,
tunnel metadata is converted to OpenFlow format and flows and wildcards
are translated back at the end of the process.

As an additional benefit, this change improves performance in some flow
setup situations by keeping the tunnel metadata in the original packet
format in more cases. This helps when copies need to be made as the amount
of data touched is only what is present in the packet rather than the
maximum amount of metadata supported.

Co-authored-by: Madhu Challa <challa@noironetworks.com>
Signed-off-by: Madhu Challa <challa@noironetworks.com>
Signed-off-by: Jesse Gross <jesse@kernel.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-04-19 18:36:04 -07:00
+								cls_rule_format(const struct cls_rule *rule, const struct tun_table *tun_table,
-												Support accepting and displaying port names in OVS tools.

Until now, most ovs-ofctl commands have not accepted names for ports, only
numbers, and have not been able to display port names either.  It's a lot
easier for users if they can use and see meaningful names instead of
arbitrary numbers.  This commit adds that support.

For backward compatibility, only interactive ovs-ofctl commands by default
display port names; to display them in scripts, use the new --names
option.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2017-05-31 16:06:12 -07:00
+								                const struct ofputil_port_map *port_map, struct ds *s)
-												classifier: Implement better classifier rule formatting.

The old formatting was only good enough for debugging, but now we need to
be able to format cls_rules as part of ofp-print.c.  This new code is
modeled after ofp_match_to_string().

											
										
										
											2010-11-23 12:31:50 -08:00
+								{
-												Support accepting and displaying port names in OVS tools.

Until now, most ovs-ofctl commands have not accepted names for ports, only
numbers, and have not been able to display port names either.  It's a lot
easier for users if they can use and see meaningful names instead of
arbitrary numbers.  This commit adds that support.

For backward compatibility, only interactive ovs-ofctl commands by default
display port names; to display them in scripts, use the new --names
option.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2017-05-31 16:06:12 -07:00
+								    minimatch_format(&rule->match, tun_table, port_map, s, rule->priority);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												classifier: Optimize iteration with a catch-all target rule.

When cls_cursor_init() is given a NULL target, it can skip an expensive
step comparing the rule against the target for every table and every rule
in the classifier.  collect_rule_loose() and other callers could take
advantage of this optimization, except that they actually pass in a rule
that matches everything instead of a NULL rule (e.g. for "ovs-ofctl
dump-flows <bridge>" without specifying a matching rule).

This optimizes that case.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-20 14:46:15 -07:00
 								/* Returns true if 'rule' matches every packet, false otherwise. */
 								bool
 								cls_rule_is_catchall(const struct cls_rule *rule)
 								{
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    return minimask_is_catchall(rule->match.mask);
-												classifier: Optimize iteration with a catch-all target rule.

When cls_cursor_init() is given a NULL target, it can skip an expensive
step comparing the rule against the target for every table and every rule
in the classifier.  collect_rule_loose() and other callers could take
advantage of this optimization, except that they actually pass in a rule
that matches everything instead of a NULL rule (e.g. for "ovs-ofctl
dump-flows <bridge>" without specifying a matching rule).

This optimizes that case.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-20 14:46:15 -07:00
+								}
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
-												classifier: Fix comment.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>
											
										
										
											2015-08-12 16:00:48 -07:00
+								/* Makes 'rule' invisible in 'remove_version'.  Once that version is used in
 								 * lookups, the caller should remove 'rule' via ovsrcu_postpone().
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								 *
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								 * 'rule' must be in a classifier.
 								 * This may only be called by the exclusive writer. */
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								void
-												classifier: Simplify versioning.

After all, there are some cases in which both the insertion version
and removal version of a rule need to be considered.  This makes the
cls_match a bit bigger, but makes classifier versioning much simpler
to understand.

Also, avoid using type larger than int in an enum, as it is not
portable C.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-12 16:12:56 -07:00
+								cls_rule_make_invisible_in_version(const struct cls_rule *rule,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                                   ovs_version_t remove_version)
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								{
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    struct cls_match *cls_match = get_cls_match_protected(rule);
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								    ovs_assert(remove_version >= cls_match->versions.add_version);
-												classifier: Simplify versioning.

After all, there are some cases in which both the insertion version
and removal version of a rule need to be considered.  This makes the
cls_match a bit bigger, but makes classifier versioning much simpler
to understand.

Also, avoid using type larger than int in an enum, as it is not
portable C.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-12 16:12:56 -07:00
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    cls_match_set_remove_version(cls_match, remove_version);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								}
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								/* This undoes the change made by cls_rule_make_invisible_in_version().
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								 *
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								 * 'rule' must be in a classifier.
 								 * This may only be called by the exclusive writer. */
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								void
 								cls_rule_restore_visibility(const struct cls_rule *rule)
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								{
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    cls_match_set_remove_version(get_cls_match_protected(rule),
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                                 OVS_VERSION_NOT_REMOVED);
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								}
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								/* Return true if 'rule' is visible in 'version'.
 								 *
 								 * 'rule' must be in a classifier. */
 								bool
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								cls_rule_visible_in_version(const struct cls_rule *rule, ovs_version_t version)
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								{
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    struct cls_match *cls_match = get_cls_match(rule);
 								    return cls_match && cls_match_visible_in_version(cls_match, version);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								}
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
 								/* Initializes 'cls' as a classifier that initially contains no classification
 								 * rules. */
 								void
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								classifier_init(struct classifier *cls, const uint8_t *flow_segments)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
 								    cls->n_rules = 0;
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    cmap_init(&cls->subtables_map);
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								    pvector_init(&cls->subtables);
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    cls->n_flow_segments = 0;
 								    if (flow_segments) {
 								        while (cls->n_flow_segments < CLS_MAX_INDICES
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								               && *flow_segments < FLOW_U64S) {
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								            cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
 								        }
 								    }
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    cls->n_tries = 0;
-												lib/classifier: Use internal mutex.

Add an internal mutex to struct cls_classifier, and reorganize
classifier internal structures according to the user of each field,
marking the fields that need to be protected by the mutex.  This makes
locking requirements easier to track, and may make lookup more memory
efficient.

After this patch there is some double locking, as callers are taking
the fat-rwlock, and we take the mutex internally.  A following patch
will remove the classifier fat-rwlock, removing the (double) locking
overhead.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    for (int i = 0; i < CLS_MAX_TRIES; i++) {
 								        trie_init(cls, i, NULL);
 								    }
-												classifier: Defer pvector publication.

This patch adds a new functions classifier_defer() and
classifier_publish(), which control when the classifier modifications
are made available to lookups.  By default, all modifications are made
available to lookups immediately.  Modifications made after a
classifier_defer() call MAY be 'deferred' for later 'publication'.  A
call to classifier_publish() will both publish any deferred
modifications, and cause subsequent changes to to be published
immediately.

Currently any deferring is limited to the visibility of the subtable
vector changes.  pvector now processes modifications mostly in a
working copy, which needs to be explicitly published with
pvector_publish().  pvector_publish() sorts the working copy and
removes gaps before publishing it.

This change helps avoiding O(n**2) memory behavior in corner cases,
where large number of rules with different masks are inserted or
deleted.

VMware-BZ: #1322017
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    cls->publish = true;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
 								/* Destroys 'cls'.  Rules within 'cls', if any, are not freed; this is the
-												lib/classifier: Lockless lookups.

Now that all the relevant classifier structures use RCU and internal
mutual exclusion for modifications, we can remove the fat-rwlock and
thus make the classifier lookups lockless.

As the readers are operating concurrently with the writers, a
concurrent reader may or may not see a new rule being added by a
writer, depending on how the concurrent events overlap with each
other.  Overall, this is no different from the former locked behavior,
but there the visibility of the new rule only depended on the timing
of the locking functions.

A new rule is first added to the segment indices, so the readers may
find the rule in the indices before the rule is visible in the
subtables 'rules' map.  This may result in us losing the opportunity
to quit lookups earlier, resulting in sub-optimal wildcarding.  This
will be fixed by forthcoming revalidation always scheduled after flow
table changes.

Similar behavior may happen due to us removing the overlapping rule
(if any) from the indices only after the corresponding new rule has
been added.

The subtable's max priority is updated only after a rule is inserted
to the maps, so the concurrent readers may not see the rule, as the
updated priority ordered subtable list will only be visible after the
subtable's max priority is updated.

Similarly, the classifier's partitions are updated by the caller after
the rule is inserted to the maps, so the readers may keep skipping the
subtable until they see the updated partitions.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								 * caller's responsibility.
 								 * May only be called after all the readers have been terminated. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								void
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								classifier_destroy(struct classifier *cls)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    if (cls) {
-												cmap, classifier: Avoid unsafe aliasing in iterators.

CMAP_FOR_EACH and CLS_FOR_EACH and their variants tried to use void ** as
a "pointer to any kind of pointer".  That is a violation of the aliasing
rules in ISO C which technically yields undefined behavior.  With GCC 4.1,
it causes both warnings and actual misbehavior.  One option would to add
-fno-strict-aliasing to the compiler flags, but that would only help with
GCC; who knows whether this can be worked around with other compilers.

Instead, this commit rewrites the iterators to avoid disallowed pointer
aliasing.

VMware-BZ: #1287651
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-21 21:00:04 -07:00
+								        struct cls_subtable *subtable;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        int i;
 								        for (i = 0; i < cls->n_tries; i++) {
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            trie_destroy(&cls->tries[i].root);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												cmap: Merge CMAP_FOR_EACH_SAFE into CMAP_FOR_EACH.

There isn't any significant downside to making cmap iteration "safe" all
the time, so this drops the _SAFE variant.

Similar changes to CMAP_CURSOR_FOR_EACH and CMAP_CURSOR_FOR_EACH_CONTINUE.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-29 09:02:23 -07:00
+								        CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								            destroy_subtable(cls, subtable);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								        cmap_destroy(&cls->subtables_map);
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								        pvector_destroy(&cls->subtables);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    }
 								}
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								/* Set the fields for which prefix lookup should be performed. */
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								bool
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								classifier_set_prefix_fields(struct classifier *cls,
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                             const enum mf_field_id *trie_fields,
 								                             unsigned int n_fields)
 								{
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    const struct mf_field * new_fields[CLS_MAX_TRIES];
-												Remove assumption that there are 64 or fewer fields.

An upcoming commit will increase the number of fields beyond 64.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-26 12:15:26 -07:00
+								    struct mf_bitmap fields = MF_BITMAP_INITIALIZER;
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    int i, n_tries = 0;
 								    bool changed = false;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        const struct mf_field *field = mf_from_id(trie_fields[i]);
 								        if (field->flow_be32ofs < 0 || field->n_bits % 32) {
 								            /* Incompatible field.  This is the only place where we
 								             * enforce these requirements, but the rest of the trie code
 								             * depends on the flow_be32ofs to be non-negative and the
 								             * field length to be a multiple of 32 bits. */
 								            continue;
 								        }
-												Remove assumption that there are 64 or fewer fields.

An upcoming commit will increase the number of fields beyond 64.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-26 12:15:26 -07:00
+								        if (bitmap_is_set(fields.bm, trie_fields[i])) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            /* Duplicate field, there is no need to build more than
 								             * one index for any one field. */
 								            continue;
 								        }
-												Remove assumption that there are 64 or fewer fields.

An upcoming commit will increase the number of fields beyond 64.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-26 12:15:26 -07:00
+								        bitmap_set1(fields.bm, trie_fields[i]);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        new_fields[n_tries] = NULL;
 								        if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
 								            new_fields[n_tries] = field;
 								            changed = true;
 								        }
 								        n_tries++;
 								    }
 								    if (changed || n_tries < cls->n_tries) {
 								        struct cls_subtable *subtable;
 								        /* Trie configuration needs to change.  Disable trie lookups
 								         * for the tries that are changing and wait all the current readers
 								         * with the old configuration to be done. */
 								        changed = false;
 								        CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
 								            for (i = 0; i < cls->n_tries; i++) {
 								                if ((i < n_tries && new_fields[i]) || i >= n_tries) {
 								                    if (subtable->trie_plen[i]) {
 								                        subtable->trie_plen[i] = 0;
 								                        changed = true;
 								                    }
 								                }
 								            }
 								        }
 								        /* Synchronize if any readers were using tries.  The readers may
 								         * temporarily function without the trie lookup based optimizations. */
 								        if (changed) {
 								            /* ovsrcu_synchronize() functions as a memory barrier, so it does
 								             * not matter that subtable->trie_plen is not atomic. */
 								            ovsrcu_synchronize();
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        /* Now set up the tries. */
 								        for (i = 0; i < n_tries; i++) {
 								            if (new_fields[i]) {
 								                trie_init(cls, i, new_fields[i]);
 								            }
 								        }
 								        /* Destroy the rest, if any. */
 								        for (; i < cls->n_tries; i++) {
 								            trie_init(cls, i, NULL);
 								        }
 								        cls->n_tries = n_tries;
 								        return true;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
 								    return false; /* No change. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								static void
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
 								    struct cls_trie *trie = &cls->tries[trie_idx];
 								    struct cls_subtable *subtable;
 								    if (trie_idx < cls->n_tries) {
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        trie_destroy(&trie->root);
 								    } else {
 								        ovsrcu_set_hidden(&trie->root, NULL);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
 								    trie->field = field;
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    /* Add existing rules to the new trie. */
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        unsigned int plen;
 								        plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
 								        if (plen) {
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								            struct cls_match *head;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								            CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								                trie_insert(trie, head->cls_rule, plen);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            }
 								        }
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        /* Initialize subtable's prefix length on this field.  This will
 								         * allow readers to use the trie. */
 								        atomic_thread_fence(memory_order_release);
 								        subtable->trie_plen[trie_idx] = plen;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
 								}
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								/* Returns true if 'cls' contains no classification rules, false otherwise.
 								 * Checking the cmap requires no locking. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								bool
 								classifier_is_empty(const struct classifier *cls)
 								{
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    return cmap_is_empty(&cls->subtables_map);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												classifier: Fix typo in comment.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-20 14:54:30 -07:00
+								/* Returns the number of rules in 'cls'. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								int
 								classifier_count(const struct classifier *cls)
 								{
-												lib/classifier: Lockless lookups.

Now that all the relevant classifier structures use RCU and internal
mutual exclusion for modifications, we can remove the fat-rwlock and
thus make the classifier lookups lockless.

As the readers are operating concurrently with the writers, a
concurrent reader may or may not see a new rule being added by a
writer, depending on how the concurrent events overlap with each
other.  Overall, this is no different from the former locked behavior,
but there the visibility of the new rule only depended on the timing
of the locking functions.

A new rule is first added to the segment indices, so the readers may
find the rule in the indices before the rule is visible in the
subtables 'rules' map.  This may result in us losing the opportunity
to quit lookups earlier, resulting in sub-optimal wildcarding.  This
will be fixed by forthcoming revalidation always scheduled after flow
table changes.

Similar behavior may happen due to us removing the overlapping rule
(if any) from the indices only after the corresponding new rule has
been added.

The subtable's max priority is updated only after a rule is inserted
to the maps, so the concurrent readers may not see the rule, as the
updated priority ordered subtable list will only be visible after the
subtable's max priority is updated.

Similarly, the classifier's partitions are updated by the caller after
the rule is inserted to the maps, so the readers may keep skipping the
subtable until they see the updated partitions.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    /* n_rules is an int, so in the presence of concurrent writers this will
 								     * return either the old or a new value. */
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    return cls->n_rules;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
 								{
 								    /* Could optimize to use the same map if needed for fast path. */
-												flow: Improve type-safety of MINIFLOW_GET_TYPE.

Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 21:34:26 -07:00
+								    return (miniflow_get_ports(match->flow)
 								            & miniflow_get_ports(&match->mask->masks));
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								}
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								/* Inserts 'rule' into 'cls' in 'version'.  Until 'rule' is removed from 'cls',
 								 * the caller must not modify or free it.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
 								 * If 'cls' already contains an identical rule (including wildcards, values of
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								 * fixed fields, and priority) that is visible in 'version', replaces the old
 								 * rule by 'rule' and returns the rule that was replaced.  The caller takes
 								 * ownership of the returned rule and is thus responsible for destroying it
 								 * with cls_rule_destroy(), after RCU grace period has passed (see
 								 * ovsrcu_postpone()).
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
 								 * Returns NULL if 'cls' does not contain a rule with an identical key, after
 								 * inserting the new rule.  In this case, no rules are displaced by the new
 								 * rule, even rules that cannot have any effect because the new rule matches a
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								 * superset of their flows and has higher priority.
 								 */
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								const struct cls_rule *
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								classifier_replace(struct classifier *cls, const struct cls_rule *rule,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                   ovs_version_t version,
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								                   const struct cls_conjunction *conjs, size_t n_conjs)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    struct cls_match *new;
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    struct cls_subtable *subtable;
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								    uint32_t ihash[CLS_MAX_INDICES];
 								    struct cls_match *head;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    unsigned int mask_offset;
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    size_t n_rules = 0;
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								    uint32_t basis;
 								    uint32_t hash;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    unsigned int i;
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    /* 'new' is initially invisible to lookups. */
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    new = cls_match_alloc(rule, version, conjs, n_conjs);
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    ovsrcu_set(&CONST_CAST(struct cls_rule *, rule)->cls_match, new);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    subtable = find_subtable(cls, rule->match.mask);
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    if (!subtable) {
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								        subtable = insert_subtable(cls, rule->match.mask);
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								    }
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    /* Compute hashes in segments. */
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								    basis = 0;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    mask_offset = 0;
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								    for (i = 0; i < subtable->n_indices; i++) {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        ihash[i] = minimatch_hash_range(&rule->match, subtable->index_maps[i],
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                        &mask_offset, &basis);
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								    }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    hash = minimatch_hash_range(&rule->match, subtable->index_maps[i],
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                &mask_offset, &basis);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    head = find_equal(subtable, rule->match.flow, hash);
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								    if (!head) {
 								        /* Add rule to tries.
 								         *
 								         * Concurrent readers might miss seeing the rule until this update,
 								         * which might require being fixed up by revalidation later. */
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								        for (i = 0; i < cls->n_tries; i++) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            if (subtable->trie_plen[i]) {
 								                trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
 								            }
 								        }
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								        /* Add rule to ports trie. */
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								        if (subtable->ports_mask_len) {
 								            /* We mask the value to be inserted to always have the wildcarded
 								             * bits in known (zero) state, so we can include them in comparison
 								             * and they will always match (== their original value does not
 								             * matter). */
 								            ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
 								            trie_insert_prefix(&subtable->ports_trie, &masked_ports,
 								                               subtable->ports_mask_len);
 								        }
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								        /* Add new node to segment indices. */
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								        for (i = 0; i < subtable->n_indices; i++) {
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								            ccmap_inc(&subtable->indices[i], ihash[i]);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								        }
 								        n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
 								    } else {   /* Equal rules exist in the classifier already. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								        struct cls_match *prev, *iter;
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
 								        /* Scan the list for the insertion point that will keep the list in
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								         * order of decreasing priority.  Insert after rules marked invisible
 								         * in any version of the same priority. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								        FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								            if (rule->priority > iter->priority
 								                || (rule->priority == iter->priority
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                    && !cls_match_is_eventually_invisible(iter))) {
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								                break;
 								            }
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
+								        }
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								        /* Replace 'iter' with 'new' or insert 'new' between 'prev' and
 								         * 'iter'. */
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								        if (iter) {
 								            struct cls_rule *old;
 								            if (rule->priority == iter->priority) {
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								                cls_match_replace(prev, iter, new);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								                old = CONST_CAST(struct cls_rule *, iter->cls_rule);
 								            } else {
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								                cls_match_insert(prev, iter, new);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								                old = NULL;
 								            }
 								            /* Replace the existing head in data structures, if rule is the new
 								             * head. */
 								            if (iter == head) {
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								                cmap_replace(&subtable->rules, &head->cmap_node,
 								                             &new->cmap_node, hash);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								            }
 								            if (old) {
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								                struct cls_conjunction_set *conj_set;
 								                conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
 								                                                &iter->conj_set);
 								                if (conj_set) {
 								                    ovsrcu_postpone(free, conj_set);
 								                }
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								                ovsrcu_set(&old->cls_match, NULL); /* Marks old rule as removed
 								                                                    * from the classifier. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								                ovsrcu_postpone(cls_match_free_cb, iter);
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								                /* No change in subtable's max priority or max count. */
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                /* Make 'new' visible to lookups in the appropriate version. */
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
 								                /* Make rule visible to iterators (immediately). */
-												classifier: Make insert and replace take a const rule, too.

classifier_remove() was recently changed to take a const struct
cls_rule *.  Make the corresponding change to classifier_replace() and
classifier_insert().  This simplifies existing calling sites in
ofproto.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								                rculist_replace(CONST_CAST(struct rculist *, &rule->node),
 								                                &old->node);
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								                /* Return displaced rule.  Caller is responsible for keeping it
 								                 * around until all threads quiesce. */
 								                return old;
 								            }
 								        } else {
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								            /* 'new' is new node after 'prev' */
 								            cls_match_insert(prev, iter, new);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								        }
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    }
-												classifier: Integrate insert_rule() into classifier_replace().

insert_rule() only had one caller and this makes the code easier to
understand.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-12 15:22:35 -08:00
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    /* Make 'new' visible to lookups in the appropriate version. */
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								    cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
 								    /* Make rule visible to iterators (immediately). */
-												classifier: Make insert and replace take a const rule, too.

classifier_remove() was recently changed to take a const struct
cls_rule *.  Make the corresponding change to classifier_replace() and
classifier_insert().  This simplifies existing calling sites in
ofproto.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    rculist_push_back(&subtable->rules_list,
 								                      CONST_CAST(struct rculist *, &rule->node));
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    /* Rule was added, not replaced.  Update 'subtable's 'max_priority' and
 								     * 'max_count', if necessary.
 								     *
 								     * The rule was already inserted, but concurrent readers may not see the
 								     * rule yet as the subtables vector is not updated yet.  This will have to
 								     * be fixed by revalidation later. */
 								    if (n_rules == 1) {
 								        subtable->max_priority = rule->priority;
 								        subtable->max_count = 1;
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								        pvector_insert(&cls->subtables, subtable, rule->priority);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    } else if (rule->priority == subtable->max_priority) {
 								        ++subtable->max_count;
 								    } else if (rule->priority > subtable->max_priority) {
 								        subtable->max_priority = rule->priority;
 								        subtable->max_count = 1;
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								        pvector_change_priority(&cls->subtables, subtable, rule->priority);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    }
 								    /* Nothing was replaced. */
 								    cls->n_rules++;
-												classifier: Defer pvector publication.

This patch adds a new functions classifier_defer() and
classifier_publish(), which control when the classifier modifications
are made available to lookups.  By default, all modifications are made
available to lookups immediately.  Modifications made after a
classifier_defer() call MAY be 'deferred' for later 'publication'.  A
call to classifier_publish() will both publish any deferred
modifications, and cause subsequent changes to to be published
immediately.

Currently any deferring is limited to the visibility of the subtable
vector changes.  pvector now processes modifications mostly in a
working copy, which needs to be explicitly published with
pvector_publish().  pvector_publish() sorts the working copy and
removes gaps before publishing it.

This change helps avoiding O(n**2) memory behavior in corner cases,
where large number of rules with different masks are inserted or
deleted.

VMware-BZ: #1322017
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
 								    if (cls->publish) {
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								        pvector_publish(&cls->subtables);
-												classifier: Defer pvector publication.

This patch adds a new functions classifier_defer() and
classifier_publish(), which control when the classifier modifications
are made available to lookups.  By default, all modifications are made
available to lookups immediately.  Modifications made after a
classifier_defer() call MAY be 'deferred' for later 'publication'.  A
call to classifier_publish() will both publish any deferred
modifications, and cause subsequent changes to to be published
immediately.

Currently any deferring is limited to the visibility of the subtable
vector changes.  pvector now processes modifications mostly in a
working copy, which needs to be explicitly published with
pvector_publish().  pvector_publish() sorts the working copy and
removes gaps before publishing it.

This change helps avoiding O(n**2) memory behavior in corner cases,
where large number of rules with different masks are inserted or
deleted.

VMware-BZ: #1322017
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    }
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    return NULL;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												ofproto: Make rule construction and destruction more symmetric.

Before, ->rule_construct() both created the rule and inserted into the
flow table, but ->rule_destruct() only destroyed the rule.  This makes
->rule_destruct() also remove the rule from the flow table.

											
										
										
											2011-05-11 14:06:48 -07:00
+								/* Inserts 'rule' into 'cls'.  Until 'rule' is removed from 'cls', the caller
 								 * must not modify or free it.
 								 *
 								 * 'cls' must not contain an identical rule (including wildcards, values of
 								 * fixed fields, and priority).  Use classifier_find_rule_exactly() to find
 								 * such a rule. */
 								void
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								classifier_insert(struct classifier *cls, const struct cls_rule *rule,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                  ovs_version_t version, const struct cls_conjunction conj[],
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								                  size_t n_conj)
-												ofproto: Make rule construction and destruction more symmetric.

Before, ->rule_construct() both created the rule and inserted into the
flow table, but ->rule_destruct() only destroyed the rule.  This makes
->rule_destruct() also remove the rule from the flow table.

											
										
										
											2011-05-11 14:06:48 -07:00
+								{
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    const struct cls_rule *displaced_rule
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								        = classifier_replace(cls, rule, version, conj, n_conj);
-												Replace most uses of assert by ovs_assert.

This is a straight search-and-replace, except that I also removed #include
<assert.h> from each file where there were no assert calls left.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2012-11-06 13:14:55 -08:00
+								    ovs_assert(!displaced_rule);
-												ofproto: Make rule construction and destruction more symmetric.

Before, ->rule_construct() both created the rule and inserted into the
flow table, but ->rule_destruct() only destroyed the rule.  This makes
->rule_destruct() also remove the rule from the flow table.

											
										
										
											2011-05-11 14:06:48 -07:00
+								}
-												classifier: Refactor interface for classifier_remove().

Until now, classifier_remove() returned either null or the classifier rule
passed to it, which is an unusual interface.  This commit changes it to
return true if it succeeds or false on failure.

In addition, most of classifier_remove()'s callers know ahead of time that
it must succeed, even though most of them didn't bother with an assertion,
so this commit adds a classifier_remove_assert() function as a helper.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-30 13:00:31 -08:00
+								/* If 'rule' is in 'cls', removes 'rule' from 'cls' and returns true.  It is
 								 * the caller's responsibility to destroy 'rule' with cls_rule_destroy(),
 								 * freeing the memory block in which 'rule' resides, etc., as necessary.
-												lib/classifier: Make classifier_remove() more robust.

classifier already provides lockless lookups, and protected
modifications.  When user wants to remove a flow, we currently require
the flow to exist in the classifier.  To be thread safe, this requires
the caller to introduce their own mutex, lock it before a lookup, and
then issue classifier_remove() while the lock is still held.

This patch relaxes the "existence requirement" of the rule in
classifier_remove(), allowing it to be called on a rule that may have
already been removed from the classifier.  This allows users to do a
classifier_lookup() and classifier_remove() without additional
syncronization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-10 15:38:57 -07:00
+								 *
-												classifier: Refactor interface for classifier_remove().

Until now, classifier_remove() returned either null or the classifier rule
passed to it, which is an unusual interface.  This commit changes it to
return true if it succeeds or false on failure.

In addition, most of classifier_remove()'s callers know ahead of time that
it must succeed, even though most of them didn't bother with an assertion,
so this commit adds a classifier_remove_assert() function as a helper.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-30 13:00:31 -08:00
+								 * If 'rule' is not in any classifier, returns false without making any
 								 * changes.
-												lib/classifier: Make classifier_remove() more robust.

classifier already provides lockless lookups, and protected
modifications.  When user wants to remove a flow, we currently require
the flow to exist in the classifier.  To be thread safe, this requires
the caller to introduce their own mutex, lock it before a lookup, and
then issue classifier_remove() while the lock is still held.

This patch relaxes the "existence requirement" of the rule in
classifier_remove(), allowing it to be called on a rule that may have
already been removed from the classifier.  This allows users to do a
classifier_lookup() and classifier_remove() without additional
syncronization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-10 15:38:57 -07:00
+								 *
-												classifier: Refactor interface for classifier_remove().

Until now, classifier_remove() returned either null or the classifier rule
passed to it, which is an unusual interface.  This commit changes it to
return true if it succeeds or false on failure.

In addition, most of classifier_remove()'s callers know ahead of time that
it must succeed, even though most of them didn't bother with an assertion,
so this commit adds a classifier_remove_assert() function as a helper.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-30 13:00:31 -08:00
+								 * 'rule' must not be in some classifier other than 'cls'.
-												lib/classifier: Make classifier_remove() more robust.

classifier already provides lockless lookups, and protected
modifications.  When user wants to remove a flow, we currently require
the flow to exist in the classifier.  To be thread safe, this requires
the caller to introduce their own mutex, lock it before a lookup, and
then issue classifier_remove() while the lock is still held.

This patch relaxes the "existence requirement" of the rule in
classifier_remove(), allowing it to be called on a rule that may have
already been removed from the classifier.  This allows users to do a
classifier_lookup() and classifier_remove() without additional
syncronization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-10 15:38:57 -07:00
+								 */
-												classifier: Refactor interface for classifier_remove().

Until now, classifier_remove() returned either null or the classifier rule
passed to it, which is an unusual interface.  This commit changes it to
return true if it succeeds or false on failure.

In addition, most of classifier_remove()'s callers know ahead of time that
it must succeed, even though most of them didn't bother with an assertion,
so this commit adds a classifier_remove_assert() function as a helper.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-30 13:00:31 -08:00
+								bool
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    struct cls_match *rule, *prev, *next, *head;
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    struct cls_conjunction_set *conj_set;
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    struct cls_subtable *subtable;
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    unsigned int mask_offset;
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    size_t n_rules;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    unsigned int i;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    rule = get_cls_match_protected(cls_rule);
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    if (!rule) {
-												classifier: Refactor interface for classifier_remove().

Until now, classifier_remove() returned either null or the classifier rule
passed to it, which is an unusual interface.  This commit changes it to
return true if it succeeds or false on failure.

In addition, most of classifier_remove()'s callers know ahead of time that
it must succeed, even though most of them didn't bother with an assertion,
so this commit adds a classifier_remove_assert() function as a helper.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-30 13:00:31 -08:00
+								        return false;
-												lib/classifier: Make classifier_remove() more robust.

classifier already provides lockless lookups, and protected
modifications.  When user wants to remove a flow, we currently require
the flow to exist in the classifier.  To be thread safe, this requires
the caller to introduce their own mutex, lock it before a lookup, and
then issue classifier_remove() while the lock is still held.

This patch relaxes the "existence requirement" of the rule in
classifier_remove(), allowing it to be called on a rule that may have
already been removed from the classifier.  This allows users to do a
classifier_lookup() and classifier_remove() without additional
syncronization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-10 15:38:57 -07:00
+								    }
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    /* Mark as removed. */
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    ovsrcu_set(&CONST_CAST(struct cls_rule *, cls_rule)->cls_match, NULL);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    /* Remove 'cls_rule' from the subtable's rules list. */
 								    rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node));
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    subtable = find_subtable(cls, cls_rule->match.mask);
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								    ovs_assert(subtable);
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    mask_offset = 0;
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    for (i = 0; i < subtable->n_indices; i++) {
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        ihash[i] = minimatch_hash_range(&cls_rule->match,
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                        subtable->index_maps[i],
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                        &mask_offset, &basis);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    hash = minimatch_hash_range(&cls_rule->match, subtable->index_maps[i],
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                &mask_offset, &basis);
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    head = find_equal(subtable, cls_rule->match.flow, hash);
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    /* Check if the rule is not the head rule. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    if (rule != head) {
 								        struct cls_match *iter;
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								        /* Not the head rule, but potentially one with the same priority. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								        /* Remove from the list of equal rules. */
 								        FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
 								            if (rule == iter) {
 								                break;
 								            }
 								        }
 								        ovs_assert(iter == rule);
 								        cls_match_remove(prev, rule);
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								        goto check_priority;
 								    }
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    /* 'rule' is the head rule.  Check if there is another rule to
 								     * replace 'rule' in the data structures. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    next = cls_match_next_protected(rule);
 								    if (next) {
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								        cmap_replace(&subtable->rules, &rule->cmap_node, &next->cmap_node,
 								                     hash);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								        goto check_priority;
 								    }
 								    /* 'rule' is last of the kind in the classifier, must remove from all the
 								     * data structures. */
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    if (subtable->ports_mask_len) {
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								        ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match);
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
 								        trie_remove_prefix(&subtable->ports_trie,
 								                           &masked_ports, subtable->ports_mask_len);
 								    }
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    for (i = 0; i < cls->n_tries; i++) {
 								        if (subtable->trie_plen[i]) {
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								            trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
 								    }
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    /* Remove rule node from indices. */
 								    for (i = 0; i < subtable->n_indices; i++) {
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								        ccmap_dec(&subtable->indices[i], ihash[i]);
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								    }
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    if (n_rules == 0) {
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								        destroy_subtable(cls, subtable);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    } else {
 								check_priority:
 								        if (subtable->max_priority == rule->priority
 								            && --subtable->max_count == 0) {
 								            /* Find the new 'max_priority' and 'max_count'. */
 								            int max_priority = INT_MIN;
 								            CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
 								                if (head->priority > max_priority) {
 								                    max_priority = head->priority;
 								                    subtable->max_count = 1;
 								                } else if (head->priority == max_priority) {
 								                    ++subtable->max_count;
 								                }
-												lib/pvector: Non-intrusive RCU priority vector.

Factor out the priority vector code from the classifier.

Making the classifier use RCU instead of locking requires parallel
access to the priority vector, pointing to subtables in descending
priority order.  When a new subtable is added, a new copy of the
priority vector is allocated, while the current readers can keep on
using the old copy they started with.  Adding and removing subtables
is usually less frequent than adding and removing rules, so this
should not have a visible performance implication.  As an optimization
for the userspace datapath use, where all the subtables have the same
priority, new subtables can be added to the end of the vector without
reallocation and without disturbing readers.

cls_subtables_reset() is now removed, as it served its purpose in bug
hunting.  Checks on the new pvector are now incorporated into
tests/test-classifier.c.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-26 07:41:25 -07:00
+								            }
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								            subtable->max_priority = max_priority;
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								            pvector_change_priority(&cls->subtables, subtable, max_priority);
-												lib/pvector: Non-intrusive RCU priority vector.

Factor out the priority vector code from the classifier.

Making the classifier use RCU instead of locking requires parallel
access to the priority vector, pointing to subtables in descending
priority order.  When a new subtable is added, a new copy of the
priority vector is allocated, while the current readers can keep on
using the old copy they started with.  Adding and removing subtables
is usually less frequent than adding and removing rules, so this
should not have a visible performance implication.  As an optimization
for the userspace datapath use, where all the subtables have the same
priority, new subtables can be added to the end of the vector without
reallocation and without disturbing readers.

cls_subtables_reset() is now removed, as it served its purpose in bug
hunting.  Checks on the new pvector are now incorporated into
tests/test-classifier.c.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-26 07:41:25 -07:00
+								        }
-												Optimize classifier by maintaining the priority of the highest priority rule in each table.

Signed-off-by: Jarno Rajahalme <jarno.rajahalme@nsn.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-02-08 00:06:22 +02:00
+								    }
-												classifier: Defer pvector publication.

This patch adds a new functions classifier_defer() and
classifier_publish(), which control when the classifier modifications
are made available to lookups.  By default, all modifications are made
available to lookups immediately.  Modifications made after a
classifier_defer() call MAY be 'deferred' for later 'publication'.  A
call to classifier_publish() will both publish any deferred
modifications, and cause subsequent changes to to be published
immediately.

Currently any deferring is limited to the visibility of the subtable
vector changes.  pvector now processes modifications mostly in a
working copy, which needs to be explicitly published with
pvector_publish().  pvector_publish() sorts the working copy and
removes gaps before publishing it.

This change helps avoiding O(n**2) memory behavior in corner cases,
where large number of rules with different masks are inserted or
deleted.

VMware-BZ: #1322017
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
 								    if (cls->publish) {
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								        pvector_publish(&cls->subtables);
-												classifier: Defer pvector publication.

This patch adds a new functions classifier_defer() and
classifier_publish(), which control when the classifier modifications
are made available to lookups.  By default, all modifications are made
available to lookups immediately.  Modifications made after a
classifier_defer() call MAY be 'deferred' for later 'publication'.  A
call to classifier_publish() will both publish any deferred
modifications, and cause subsequent changes to to be published
immediately.

Currently any deferring is limited to the visibility of the subtable
vector changes.  pvector now processes modifications mostly in a
working copy, which needs to be explicitly published with
pvector_publish().  pvector_publish() sorts the working copy and
removes gaps before publishing it.

This change helps avoiding O(n**2) memory behavior in corner cases,
where large number of rules with different masks are inserted or
deleted.

VMware-BZ: #1322017
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    }
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    /* free the rule. */
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								                                    &rule->conj_set);
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    if (conj_set) {
 								        ovsrcu_postpone(free, conj_set);
 								    }
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    ovsrcu_postpone(cls_match_free_cb, rule);
-												classifier: Do not insert duplicate rules in indices.

There is no point in adding duplicate information into prefix tries.

Also, since the lower-priority duplicate rules are not visible to
lookups, they do not need to be in staged lookup indices directly
either (the head rule is).

Finally, now that cmap operations return the number of elements in the
cmap, subtable's 'n_rules' member is not needed any more.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-14 14:47:03 -08:00
+								    cls->n_rules--;
-												lib/classifier: Make classifier_remove() more robust.

classifier already provides lockless lookups, and protected
modifications.  When user wants to remove a flow, we currently require
the flow to exist in the classifier.  To be thread safe, this requires
the caller to introduce their own mutex, lock it before a lookup, and
then issue classifier_remove() while the lock is still held.

This patch relaxes the "existence requirement" of the rule in
classifier_remove(), allowing it to be called on a rule that may have
already been removed from the classifier.  This allows users to do a
classifier_lookup() and classifier_remove() without additional
syncronization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-10 15:38:57 -07:00
-												classifier: Refactor interface for classifier_remove().

Until now, classifier_remove() returned either null or the classifier rule
passed to it, which is an unusual interface.  This commit changes it to
return true if it succeeds or false on failure.

In addition, most of classifier_remove()'s callers know ahead of time that
it must succeed, even though most of them didn't bother with an assertion,
so this commit adds a classifier_remove_assert() function as a helper.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-01-30 13:00:31 -08:00
+								    return true;
 								}
 								void
 								classifier_remove_assert(struct classifier *cls,
 								                         const struct cls_rule *cls_rule)
 								{
 								    ovs_assert(classifier_remove(cls, cls_rule));
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								/* Prefix tree context.  Valid when 'lookup_done' is true.  Can skip all
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								 * subtables which have a prefix match on the trie field, but whose prefix
 								 * length is not indicated in 'match_plens'.  For example, a subtable that
 								 * has a 8-bit trie field prefix match can be skipped if
 								 * !be_get_bit_at(&match_plens, 8 - 1).  If skipped, 'maskbits' prefix bits
 								 * must be unwildcarded to make datapath flow only match packets it should. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								struct trie_ctx {
 								    const struct cls_trie *trie;
 								    bool lookup_done;        /* Status of the lookup. */
 								    uint8_t be32ofs;         /* U32 offset of the field in question. */
 								    unsigned int maskbits;   /* Prefix length needed to avoid false matches. */
-												classifier: Do not use mf_value.

mf_value has grown bigger than needed for storing the biggest
supported prefix (IPv6 address length).  Define a new type to be used
instead of mf_value.

This makes classifier lookups a bit faster.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-12 17:03:07 -07:00
+								    union trie_prefix match_plens;  /* Bitmask of prefix lengths with possible
 								                                     * matches. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								};
 								static void
 								trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
 								{
 								    ctx->trie = trie;
 								    ctx->be32ofs = trie->field->flow_be32ofs;
 								    ctx->lookup_done = false;
 								}
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								struct conjunctive_match {
 								    struct hmap_node hmap_node;
 								    uint32_t id;
 								    uint64_t clauses;
 								};
 								static struct conjunctive_match *
 								find_conjunctive_match__(struct hmap *matches, uint64_t id, uint32_t hash)
 								{
 								    struct conjunctive_match *m;
 								    HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, hash, matches) {
 								        if (m->id == id) {
 								            return m;
 								        }
 								    }
 								    return NULL;
 								}
 								static bool
 								find_conjunctive_match(const struct cls_conjunction_set *set,
 								                       unsigned int max_n_clauses, struct hmap *matches,
 								                       struct conjunctive_match *cm_stubs, size_t n_cm_stubs,
 								                       uint32_t *idp)
 								{
 								    const struct cls_conjunction *c;
 								    if (max_n_clauses < set->min_n_clauses) {
 								        return false;
 								    }
 								    for (c = set->conj; c < &set->conj[set->n]; c++) {
 								        struct conjunctive_match *cm;
 								        uint32_t hash;
 								        if (c->n_clauses > max_n_clauses) {
 								            continue;
 								        }
 								        hash = hash_int(c->id, 0);
 								        cm = find_conjunctive_match__(matches, c->id, hash);
 								        if (!cm) {
 								            size_t n = hmap_count(matches);
 								            cm = n < n_cm_stubs ? &cm_stubs[n] : xmalloc(sizeof *cm);
 								            hmap_insert(matches, &cm->hmap_node, hash);
 								            cm->id = c->id;
 								            cm->clauses = UINT64_MAX << (c->n_clauses & 63);
 								        }
 								        cm->clauses |= UINT64_C(1) << c->clause;
 								        if (cm->clauses == UINT64_MAX) {
 								            *idp = cm->id;
 								            return true;
 								        }
 								    }
 								    return false;
 								}
 								static void
 								free_conjunctive_matches(struct hmap *matches,
 								                         struct conjunctive_match *cm_stubs, size_t n_cm_stubs)
 								{
 								    if (hmap_count(matches) > n_cm_stubs) {
 								        struct conjunctive_match *cm, *next;
 								        HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) {
 								            if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) {
 								                free(cm);
 								            }
 								        }
 								    }
 								    hmap_destroy(matches);
 								}
 								/* Like classifier_lookup(), except that support for conjunctive matches can be
 								 * configured with 'allow_conjunctive_matches'.  That feature is not exposed
 								 * externally because turning off conjunctive matches is only useful to avoid
 								 * recursion within this function itself.
-												classifier: Make classifier_lookup() 'flow' parameter non-const.

An upcoming commit will make classifier_lookup() sometimes modify its
'flow' argument temporarily during the lookup.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
---
v2: New patch.
v2.1: Rebase.
v3: Rebase.

											
										
										
											2014-10-30 14:12:45 -07:00
+								 *
 								 * 'flow' is non-const to allow for temporary modifications during the lookup.
 								 * Any changes are restored before returning. */
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								static const struct cls_rule *
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								classifier_lookup__(const struct classifier *cls, ovs_version_t version,
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                    struct flow *flow, struct flow_wildcards *wc,
 								                    bool allow_conjunctive_matches)
-												classifier: Merge classifier_lookup_wild(), classifier_lookup_exact().

Merge these functions into classifier_lookup() and update its interface.

The new version of the classifier soon to be implemented naturally merges
these functions, so this commit updates the interface early.

											
										
										
											2010-10-14 10:13:51 -07:00
+								{
-												lib/pvector: Non-intrusive RCU priority vector.

Factor out the priority vector code from the classifier.

Making the classifier use RCU instead of locking requires parallel
access to the priority vector, pointing to subtables in descending
priority order.  When a new subtable is added, a new copy of the
priority vector is allocated, while the current readers can keep on
using the old copy they started with.  Adding and removing subtables
is usually less frequent than adding and removing rules, so this
should not have a visible performance implication.  As an optimization
for the userspace datapath use, where all the subtables have the same
priority, new subtables can be added to the end of the vector without
reallocation and without disturbing readers.

cls_subtables_reset() is now removed, as it served its purpose in bug
hunting.  Checks on the new pvector are now incorporated into
tests/test-classifier.c.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-26 07:41:25 -07:00
+								    struct trie_ctx trie_ctx[CLS_MAX_TRIES];
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    const struct cls_match *match;
 								    /* Highest-priority flow in 'cls' that certainly matches 'flow'. */
 								    const struct cls_match *hard = NULL;
 								    int hard_pri = INT_MIN;     /* hard ? hard->priority : INT_MIN. */
 								    /* Highest-priority conjunctive flows in 'cls' matching 'flow'.  Since
 								     * these are (components of) conjunctive flows, we can only know whether
 								     * the full conjunctive flow matches after seeing multiple of them.  Thus,
 								     * we refer to these as "soft matches". */
 								    struct cls_conjunction_set *soft_stub[64];
 								    struct cls_conjunction_set **soft = soft_stub;
 								    size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub);
 								    int soft_pri = INT_MIN;    /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    /* Synchronize for cls->n_tries and subtable->trie_plen.  They can change
 								     * when table configuration changes, which happens typically only on
 								     * startup. */
 								    atomic_thread_fence(memory_order_acquire);
-												classifier: Correct comment in classifier_lookup().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>

											
										
										
											2014-10-24 16:17:08 -07:00
+								    /* Initialize trie contexts for find_match_wc(). */
-												lib/pvector: Non-intrusive RCU priority vector.

Factor out the priority vector code from the classifier.

Making the classifier use RCU instead of locking requires parallel
access to the priority vector, pointing to subtables in descending
priority order.  When a new subtable is added, a new copy of the
priority vector is allocated, while the current readers can keep on
using the old copy they started with.  Adding and removing subtables
is usually less frequent than adding and removing rules, so this
should not have a visible performance implication.  As an optimization
for the userspace datapath use, where all the subtables have the same
priority, new subtables can be added to the end of the vector without
reallocation and without disturbing readers.

cls_subtables_reset() is now removed, as it served its purpose in bug
hunting.  Checks on the new pvector are now incorporated into
tests/test-classifier.c.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-26 07:41:25 -07:00
+								    for (int i = 0; i < cls->n_tries; i++) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
 								    }
-												classifier: Use array for subtables instead of a list.

Using a linear array allows more efficient memory access for lookups.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    /* Main loop. */
 								    struct cls_subtable *subtable;
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								    PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri + 1, 2, sizeof *subtable,
 								                               &cls->subtables) {
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								        struct cls_conjunction_set *conj_set;
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								        /* Skip subtables with no match, or where the match is lower-priority
 								         * than some certain match we've already found. */
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								        match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries,
 								                              wc);
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								        if (!match || match->priority <= hard_pri) {
 								            continue;
 								        }
 								        conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set);
 								        if (!conj_set) {
 								            /* 'match' isn't part of a conjunctive match.  It's the best
 								             * certain match we've got so far, since we know that it's
 								             * higher-priority than hard_pri.
 								             *
 								             * (There might be a higher-priority conjunctive match.  We can't
 								             * tell yet.) */
 								            hard = match;
 								            hard_pri = hard->priority;
 								        } else if (allow_conjunctive_matches) {
 								            /* 'match' is part of a conjunctive match.  Add it to the list. */
 								            if (OVS_UNLIKELY(n_soft >= allocated_soft)) {
 								                struct cls_conjunction_set **old_soft = soft;
 								                allocated_soft *= 2;
 								                soft = xmalloc(allocated_soft * sizeof *soft);
 								                memcpy(soft, old_soft, n_soft * sizeof *soft);
 								                if (old_soft != soft_stub) {
 								                    free(old_soft);
 								                }
 								            }
 								            soft[n_soft++] = conj_set;
 								            /* Keep track of the highest-priority soft match. */
 								            if (soft_pri < match->priority) {
 								                soft_pri = match->priority;
 								            }
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								        }
-												classifier: Merge classifier_lookup_wild(), classifier_lookup_exact().

Merge these functions into classifier_lookup() and update its interface.

The new version of the classifier soon to be implemented naturally merges
these functions, so this commit updates the interface early.

											
										
										
											2010-10-14 10:13:51 -07:00
+								    }
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								    /* In the common case, at this point we have no soft matches and we can
 								     * return immediately.  (We do the same thing if we have potential soft
 								     * matches but none of them are higher-priority than our hard match.) */
 								    if (hard_pri >= soft_pri) {
 								        if (soft != soft_stub) {
 								            free(soft);
 								        }
 								        return hard ? hard->cls_rule : NULL;
 								    }
 								    /* At this point, we have some soft matches.  We might also have a hard
 								     * match; if so, its priority is lower than the highest-priority soft
 								     * match. */
 								    /* Soft match loop.
 								     *
 								     * Check whether soft matches are real matches. */
 								    for (;;) {
 								        /* Delete soft matches that are null.  This only happens in second and
 								         * subsequent iterations of the soft match loop, when we drop back from
 								         * a high-priority soft match to a lower-priority one.
 								         *
 								         * Also, delete soft matches whose priority is less than or equal to
 								         * the hard match's priority.  In the first iteration of the soft
 								         * match, these can be in 'soft' because the earlier main loop found
 								         * the soft match before the hard match.  In second and later iteration
 								         * of the soft match loop, these can be in 'soft' because we dropped
 								         * back from a high-priority soft match to a lower-priority soft match.
 								         *
 								         * It is tempting to delete soft matches that cannot be satisfied
 								         * because there are fewer soft matches than required to satisfy any of
 								         * their conjunctions, but we cannot do that because there might be
 								         * lower priority soft or hard matches with otherwise identical
 								         * matches.  (We could special case those here, but there's no
 								         * need--we'll do so at the bottom of the soft match loop anyway and
 								         * this duplicates less code.)
 								         *
 								         * It's also tempting to break out of the soft match loop if 'n_soft ==
 								         * 1' but that would also miss lower-priority hard matches.  We could
 								         * special case that also but again there's no need. */
 								        for (int i = 0; i < n_soft; ) {
 								            if (!soft[i] || soft[i]->priority <= hard_pri) {
 								                soft[i] = soft[--n_soft];
 								            } else {
 								                i++;
 								            }
 								        }
 								        if (!n_soft) {
 								            break;
 								        }
 								        /* Find the highest priority among the soft matches.  (We know this
 								         * must be higher than the hard match's priority; otherwise we would
 								         * have deleted all of the soft matches in the previous loop.)  Count
 								         * the number of soft matches that have that priority. */
 								        soft_pri = INT_MIN;
 								        int n_soft_pri = 0;
 								        for (int i = 0; i < n_soft; i++) {
 								            if (soft[i]->priority > soft_pri) {
 								                soft_pri = soft[i]->priority;
 								                n_soft_pri = 1;
 								            } else if (soft[i]->priority == soft_pri) {
 								                n_soft_pri++;
 								            }
 								        }
 								        ovs_assert(soft_pri > hard_pri);
 								        /* Look for a real match among the highest-priority soft matches.
 								         *
 								         * It's unusual to have many conjunctive matches, so we use stubs to
 								         * avoid calling malloc() in the common case.  An hmap has a built-in
 								         * stub for up to 2 hmap_nodes; possibly, we would benefit a variant
 								         * with a bigger stub. */
 								        struct conjunctive_match cm_stubs[16];
 								        struct hmap matches;
 								        hmap_init(&matches);
 								        for (int i = 0; i < n_soft; i++) {
 								            uint32_t id;
 								            if (soft[i]->priority == soft_pri
 								                && find_conjunctive_match(soft[i], n_soft_pri, &matches,
 								                                          cm_stubs, ARRAY_SIZE(cm_stubs),
 								                                          &id)) {
 								                uint32_t saved_conj_id = flow->conj_id;
 								                const struct cls_rule *rule;
 								                flow->conj_id = id;
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                rule = classifier_lookup__(cls, version, flow, wc, false);
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								                flow->conj_id = saved_conj_id;
 								                if (rule) {
 								                    free_conjunctive_matches(&matches,
 								                                             cm_stubs, ARRAY_SIZE(cm_stubs));
 								                    if (soft != soft_stub) {
 								                        free(soft);
 								                    }
 								                    return rule;
 								                }
 								            }
 								        }
 								        free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs));
 								        /* There's no real match among the highest-priority soft matches.
 								         * However, if any of those soft matches has a lower-priority but
 								         * otherwise identical flow match, then we need to consider those for
 								         * soft or hard matches.
 								         *
 								         * The next iteration of the soft match loop will delete any null
 								         * pointers we put into 'soft' (and some others too). */
 								        for (int i = 0; i < n_soft; i++) {
 								            if (soft[i]->priority != soft_pri) {
 								                continue;
 								            }
 								            /* Find next-lower-priority flow with identical flow match. */
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								            match = next_visible_rule_in_list(soft[i]->match, version);
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								            if (match) {
 								                soft[i] = ovsrcu_get(struct cls_conjunction_set *,
 								                                     &match->conj_set);
 								                if (!soft[i]) {
 								                    /* The flow is a hard match; don't treat as a soft
 								                     * match. */
 								                    if (match->priority > hard_pri) {
 								                        hard = match;
 								                        hard_pri = hard->priority;
 								                    }
 								                }
 								            } else {
 								                /* No such lower-priority flow (probably the common case). */
 								                soft[i] = NULL;
 								            }
 								        }
 								    }
 								    if (soft != soft_stub) {
 								        free(soft);
 								    }
 								    return hard ? hard->cls_rule : NULL;
 								}
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and
 								 * that is visible in 'version'.  Returns a null pointer if no rules in 'cls'
 								 * match 'flow'.  If multiple rules of equal priority match 'flow', returns one
 								 * arbitrarily.
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								 *
 								 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
 								 * set of bits that were significant in the lookup.  At some point
 								 * earlier, 'wc' should have been initialized (e.g., by
 								 * flow_wildcards_init_catchall()).
 								 *
 								 * 'flow' is non-const to allow for temporary modifications during the lookup.
 								 * Any changes are restored before returning. */
 								const struct cls_rule *
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								classifier_lookup(const struct classifier *cls, ovs_version_t version,
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                  struct flow *flow, struct flow_wildcards *wc)
-												classifier: Add support for conjunctive matches.

A "conjunctive match" allows higher-level matches in the flow table, such
as set membership matches, without causing a cross-product explosion for
multidimensional matches.  Please refer to the documentation that this
commit adds to ovs-ofctl(8) for a better explanation, including an example.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-01-11 13:25:24 -08:00
+								{
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    return classifier_lookup__(cls, version, flow, wc, true);
-												classifier: Merge classifier_lookup_wild(), classifier_lookup_exact().

Merge these functions into classifier_lookup() and update its interface.

The new version of the classifier soon to be implemented naturally merges
these functions, so this commit updates the interface early.

											
										
										
											2010-10-14 10:13:51 -07:00
+								}
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								/* Finds and returns a rule in 'cls' with exactly the same priority and
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								 * matching criteria as 'target', and that is visible in 'version'.
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								 * Only one such rule may ever exist.  Returns a null pointer if 'cls' doesn't
 								 * contain an exact match. */
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								const struct cls_rule *
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								classifier_find_rule_exactly(const struct classifier *cls,
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								                             const struct cls_rule *target,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                             ovs_version_t version)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								    const struct cls_match *head, *rule;
 								    const struct cls_subtable *subtable;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    subtable = find_subtable(cls, target->match.mask);
-												Revert "lib/classifier: Minimize critical section."

This reverts commit d2064437e2bf91859a0a50fba30dcabba668a811, which
fails clang thread satefy analysis.

A more complete patch will be introduced later.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-10-15 10:56:32 -07:00
+								    if (!subtable) {
-												classifier: Make classifier_find_rule_exactly() lockless.

struct cls_match 'list' member was recently changed to an rculist.
This allows classifier_find_rule_exactly() to be made lockless.

Since subtable's 'max_priority' member would still require a lock, we
no longer check it before calling find_equal().  This adds a hash
table lookup in cases where the subtable may already be known to not
contain any rule of the target priority.
classifier_find_rule_exactly() is never called on the fastpath, so
this should not be significant.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-03 09:56:54 -08:00
+								        return NULL;
-												Optimize classifier by maintaining the priority of the highest priority rule in each table.

Signed-off-by: Jarno Rajahalme <jarno.rajahalme@nsn.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-02-08 00:06:22 +02:00
+								    }
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    head = find_equal(subtable, target->match.flow,
 								                      miniflow_hash_in_minimask(target->match.flow,
 								                                                target->match.mask, 0));
-												classifier: Make classifier_find_rule_exactly() lockless.

struct cls_match 'list' member was recently changed to an rculist.
This allows classifier_find_rule_exactly() to be made lockless.

Since subtable's 'max_priority' member would still require a lock, we
no longer check it before calling find_equal().  This adds a hash
table lookup in cases where the subtable may already be known to not
contain any rule of the target priority.
classifier_find_rule_exactly() is never called on the fastpath, so
this should not be significant.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-03 09:56:54 -08:00
+								    if (!head) {
 								        return NULL;
 								    }
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								    CLS_MATCH_FOR_EACH (rule, head) {
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								        if (rule->priority < target->priority) {
 								            break; /* Not found. */
 								        }
 								        if (rule->priority == target->priority
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								            && cls_match_visible_in_version(rule, version)) {
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								            return rule->cls_rule;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
 								    }
 								    return NULL;
 								}
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								 * same matching criteria as 'target', and that is visible in 'version'.
 								 * Returns a null pointer if 'cls' doesn't contain an exact match visible in
 								 * 'version'. */
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								const struct cls_rule *
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								classifier_find_match_exactly(const struct classifier *cls,
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                              const struct match *target, int priority,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                              ovs_version_t version)
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								{
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								    const struct cls_rule *retval;
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								    struct cls_rule cr;
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    cls_rule_init(&cr, target, priority);
 								    retval = classifier_find_rule_exactly(cls, &cr, version);
-												classifier: Prepare for "struct cls_rule" needing to be destroyed.

Until now, "struct cls_rule" didn't own any data outside its own memory
block.  An upcoming commit will make "struct cls_rule" sometimes own blocks
of memory, so it needs "destroy" and to a lesser extent "clone" functions.
This commit adds these in advance, even though they are mostly no-ops, to
make it possible to separately review the memory management.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-20 11:29:43 -07:00
+								    cls_rule_destroy(&cr);
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
 								    return retval;
 								}
-												flow, match, classifier: Add new functions for miniflow and minimatch.

The miniflow and minimatch APIs lack several of the features of the flow
and match APIs.  This commit adds a few of the missing functions.

These functions will be used for the first time in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 22:00:34 -07:00
+								/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
 								 * same matching criteria as 'target', and that is visible in 'version'.
 								 * Returns a null pointer if 'cls' doesn't contain an exact match visible in
 								 * 'version'. */
 								const struct cls_rule *
 								classifier_find_minimatch_exactly(const struct classifier *cls,
 								                              const struct minimatch *target, int priority,
 								                              ovs_version_t version)
 								{
 								    const struct cls_rule *retval;
 								    struct cls_rule cr;
 								    cls_rule_init_from_minimatch(&cr, target, priority);
 								    retval = classifier_find_rule_exactly(cls, &cr, version);
 								    cls_rule_destroy(&cr);
 								    return retval;
 								}
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								/* Checks if 'target' would overlap any other rule in 'cls' in 'version'.  Two
 								 * rules are considered to overlap if both rules have the same priority and a
 								 * packet could match both, and if both rules are visible in the same version.
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								 *
 								 * A trivial example of overlapping rules is two rules matching disjoint sets
 								 * of fields. E.g., if one rule matches only on port number, while another only
 								 * on dl_type, any packet from that specific port and with that specific
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								 * dl_type could match both, if the rules also have the same priority. */
-												ofproto: Check overlap, emerg flow cache, and error code sync (OpenFlow 0.9)

This commit adds (some) support for a couple new OpenFlow 0.9 features:

    - The OFPFF_CHECK_OVERLAP flag in Flow Mod messages allows the
      controller to prevent flows that would conflict at the same
      priority.

    - An emergency flow cache that contains a small flow table that is
      used if the switch loses connectivity with the controller.  I
      believe the design has fundamental flaws and looks likely to be
      retired.  If a controller attempts to add a flow to the emergency
      flow cache, OVS always responds that the tables are full.

The OpenFlow 0.9 error codes are also sync'd in the commit.

NOTE: OVS at this point is not wire-compatible with OpenFlow 0.9 until the
final commit in this OpenFlow 0.9 set.

											
										
										
											2009-11-12 15:40:33 -08:00
+								bool
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								classifier_rule_overlaps(const struct classifier *cls,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                         const struct cls_rule *target, ovs_version_t version)
-												ofproto: Check overlap, emerg flow cache, and error code sync (OpenFlow 0.9)

This commit adds (some) support for a couple new OpenFlow 0.9 features:

    - The OFPFF_CHECK_OVERLAP flag in Flow Mod messages allows the
      controller to prevent flows that would conflict at the same
      priority.

    - An emergency flow cache that contains a small flow table that is
      used if the switch loses connectivity with the controller.  I
      believe the design has fundamental flaws and looks likely to be
      retired.  If a controller attempts to add a flow to the emergency
      flow cache, OVS always responds that the tables are full.

The OpenFlow 0.9 error codes are also sync'd in the commit.

NOTE: OVS at this point is not wire-compatible with OpenFlow 0.9 until the
final commit in this OpenFlow 0.9 set.

											
										
										
											2009-11-12 15:40:33 -08:00
+								{
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    struct cls_subtable *subtable;
-												ofproto: Check overlap, emerg flow cache, and error code sync (OpenFlow 0.9)

This commit adds (some) support for a couple new OpenFlow 0.9 features:

    - The OFPFF_CHECK_OVERLAP flag in Flow Mod messages allows the
      controller to prevent flows that would conflict at the same
      priority.

    - An emergency flow cache that contains a small flow table that is
      used if the switch loses connectivity with the controller.  I
      believe the design has fundamental flaws and looks likely to be
      retired.  If a controller attempts to add a flow to the emergency
      flow cache, OVS always responds that the tables are full.

The OpenFlow 0.9 error codes are also sync'd in the commit.

NOTE: OVS at this point is not wire-compatible with OpenFlow 0.9 until the
final commit in this OpenFlow 0.9 set.

											
										
										
											2009-11-12 15:40:33 -08:00
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    /* Iterate subtables in the descending max priority order. */
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								    PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority, 2,
 								                               sizeof(struct cls_subtable), &cls->subtables) {
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								        struct {
 								            struct minimask mask;
 								            uint64_t storage[FLOW_U64S];
 								        } m;
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								        const struct cls_rule *rule;
-												ofproto: Check overlap, emerg flow cache, and error code sync (OpenFlow 0.9)

This commit adds (some) support for a couple new OpenFlow 0.9 features:

    - The OFPFF_CHECK_OVERLAP flag in Flow Mod messages allows the
      controller to prevent flows that would conflict at the same
      priority.

    - An emergency flow cache that contains a small flow table that is
      used if the switch loses connectivity with the controller.  I
      believe the design has fundamental flaws and looks likely to be
      retired.  If a controller attempts to add a flow to the emergency
      flow cache, OVS always responds that the tables are full.

The OpenFlow 0.9 error codes are also sync'd in the commit.

NOTE: OVS at this point is not wire-compatible with OpenFlow 0.9 until the
final commit in this OpenFlow 0.9 set.

											
										
										
											2009-11-12 15:40:33 -08:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								        minimask_combine(&m.mask, target->match.mask, &subtable->mask,
 								                         m.storage);
-												ofproto: Check overlap, emerg flow cache, and error code sync (OpenFlow 0.9)

This commit adds (some) support for a couple new OpenFlow 0.9 features:

    - The OFPFF_CHECK_OVERLAP flag in Flow Mod messages allows the
      controller to prevent flows that would conflict at the same
      priority.

    - An emergency flow cache that contains a small flow table that is
      used if the switch loses connectivity with the controller.  I
      believe the design has fundamental flaws and looks likely to be
      retired.  If a controller attempts to add a flow to the emergency
      flow cache, OVS always responds that the tables are full.

The OpenFlow 0.9 error codes are also sync'd in the commit.

NOTE: OVS at this point is not wire-compatible with OpenFlow 0.9 until the
final commit in this OpenFlow 0.9 set.

											
										
										
											2009-11-12 15:40:33 -08:00
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								        RCULIST_FOR_EACH (rule, node, &subtable->rules_list) {
 								            if (rule->priority == target->priority
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								                && miniflow_equal_in_minimask(target->match.flow,
 								                                              rule->match.flow, &m.mask)
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								                && cls_rule_visible_in_version(rule, version)) {
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								                return true;
-												ofproto: Check overlap, emerg flow cache, and error code sync (OpenFlow 0.9)

This commit adds (some) support for a couple new OpenFlow 0.9 features:

    - The OFPFF_CHECK_OVERLAP flag in Flow Mod messages allows the
      controller to prevent flows that would conflict at the same
      priority.

    - An emergency flow cache that contains a small flow table that is
      used if the switch loses connectivity with the controller.  I
      believe the design has fundamental flaws and looks likely to be
      retired.  If a controller attempts to add a flow to the emergency
      flow cache, OVS always responds that the tables are full.

The OpenFlow 0.9 error codes are also sync'd in the commit.

NOTE: OVS at this point is not wire-compatible with OpenFlow 0.9 until the
final commit in this OpenFlow 0.9 set.

											
										
										
											2009-11-12 15:40:33 -08:00
+								            }
 								        }
 								    }
 								    return false;
 								}
-												classifier: New function cls_rule_is_loose_match().

This function will be useful in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-12 10:13:10 -07:00
 								/* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
 								 * specific than 'criteria'.  That is, 'rule' matches 'criteria' and this
 								 * function returns true if, for every field:
 								 *
 								 *   - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
 								 *     field, or
 								 *
 								 *   - 'criteria' wildcards the field,
 								 *
 								 * Conversely, 'rule' does not match 'criteria' and this function returns false
 								 * if, for at least one field:
 								 *
 								 *   - 'criteria' and 'rule' specify different values for the field, or
 								 *
 								 *   - 'criteria' specifies a value for the field but 'rule' wildcards it.
 								 *
 								 * Equivalently, the truth table for whether a field matches is:
 								 *
 								 *                                     rule
 								 *
 								 *                   c         wildcard    exact
 								 *                   r        +---------+---------+
 								 *                   i   wild |   yes   |   yes   |
 								 *                   t   card |         |         |
 								 *                   e        +---------+---------+
 								 *                   r  exact |    no   |if values|
 								 *                   i        |         |are equal|
 								 *                   a        +---------+---------+
 								 *
 								 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
 								 * commands and by OpenFlow 1.0 aggregate and flow stats.
 								 *
-												classifier: Break cls_rule 'flow' and 'wc' members into new "struct match".

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-07 15:28:18 -07:00
+								 * Ignores rule->priority. */
-												classifier: New function cls_rule_is_loose_match().

This function will be useful in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-12 10:13:10 -07:00
+								bool
 								cls_rule_is_loose_match(const struct cls_rule *rule,
-												Introduce sparse flows and masks, to reduce memory usage and improve speed.

A cls_rule is 324 bytes on i386 now.  The cost of a flow table lookup is
currently proportional to this size, which is going to continue to grow.
However, the required cost of a flow table lookup, with the classifier that
we currently use, is only proportional to the number of bits that a rule
actually matches.  This commit implements that optimization by replacing
the match inside "struct cls_rule" by a sparse representation.

This reduces struct cls_rule to 100 bytes on i386.

There is still some headroom for further optimization following this
commit:

    - I suspect that adding an 'n' member to struct miniflow would make
      miniflow operations faster, since popcount() has some cost.

    - It's probably possible to replace the "struct minimatch" in cls_rule
      by just a "struct miniflow", since the cls_rule's cls_table has a
      copy of the minimask.

    - Some of the miniflow operations aren't well-optimized.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-09-04 12:43:53 -07:00
+								                        const struct minimatch *criteria)
-												classifier: New function cls_rule_is_loose_match().

This function will be useful in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-12 10:13:10 -07:00
+								{
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    return (!minimask_has_extra(rule->match.mask, criteria->mask)
 								            && miniflow_equal_in_minimask(rule->match.flow, criteria->flow,
 								                                          criteria->mask));
-												classifier: New function cls_rule_is_loose_match().

This function will be useful in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-12 10:13:10 -07:00
+								}
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								/* Iteration. */
 								static bool
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								rule_matches(const struct cls_rule *rule, const struct cls_rule *target,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								             ovs_version_t version)
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								{
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    /* Rule may only match a target if it is visible in target's version. */
-												classifier: Fix race condition leading to NULL dereference.

Addition of table versioning exposed struct cls_rule member
'cls_match' to RCU readers and made it possible for 'cls_match' become
NULL while being accessed by an RCU reader, but we failed to check for
this condition.  This may have resulted in NULL pointer dereference
and ovs-vswitchd crash.

Fix this by making the 'cls_match' member an RCU pointer and checking
the value whenever it potentially read by an RCU reader.  In these
instances we use ovsrcu_get(), whereas functions accessible only by
the exclusive writers use ovsrcu_get_protected() and do not need to
check the result.

VMware-BZ: 1643642
Fixes: 2b7b1427 ("classifier: Support table versioning")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-04-17 08:51:21 -07:00
+								    return cls_rule_visible_in_version(rule, version)
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								        && (!target || miniflow_equal_in_minimask(rule->match.flow,
 								                                                  target->match.flow,
 								                                                  target->match.mask));
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								}
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								static const struct cls_rule *
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								search_subtable(const struct cls_subtable *subtable,
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								                struct cls_cursor *cursor)
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								{
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    if (!cursor->target
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								        || !minimask_has_extra(&subtable->mask, cursor->target->match.mask)) {
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								        const struct cls_rule *rule;
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								        RCULIST_FOR_EACH (rule, node, &subtable->rules_list) {
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								            if (rule_matches(rule, cursor->target, cursor->version)) {
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								                return rule;
 								            }
 								        }
 								    }
 								    return NULL;
 								}
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								/* Initializes 'cursor' for iterating through rules in 'cls', and returns the
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								 * cursor.
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								 *
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								 *     - If 'target' is null, or if the 'target' is a catchall target, the
 								 *       cursor will visit every rule in 'cls' that is visible in 'version'.
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								 *
-												classifier: New function cls_rule_is_loose_match().

This function will be useful in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-12 10:13:10 -07:00
+								 *     - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								 *       such that cls_rule_is_loose_match(rule, target) returns true and that
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								 *       the rule is visible in 'version'.
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								 *
-												classifier: New function cls_rule_is_loose_match().

This function will be useful in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-07-12 10:13:10 -07:00
+								 * Ignores target->priority. */
-												classifier: Support duplicate rules.

OpenFlow 1.4 bundles are easier to implement when it is possible to
mark a rule as 'to_be_removed' and then insert a new, identical rule
with the same priority.

All but one out of the identical rules must be marked as
'to_be_removed', and the one rule that is not 'to_be_removed' must
have been inserted last.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								struct cls_cursor
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								cls_cursor_start(const struct classifier *cls, const struct cls_rule *target,
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								                 ovs_version_t version)
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								{
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    struct cls_cursor cursor;
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    struct cls_subtable *subtable;
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    cursor.cls = cls;
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								    cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL;
 								    cursor.version = version;
-												cmap, classifier: Avoid unsafe aliasing in iterators.

CMAP_FOR_EACH and CLS_FOR_EACH and their variants tried to use void ** as
a "pointer to any kind of pointer".  That is a violation of the aliasing
rules in ISO C which technically yields undefined behavior.  With GCC 4.1,
it causes both warnings and actual misbehavior.  One option would to add
-fno-strict-aliasing to the compiler flags, but that would only help with
GCC; who knows whether this can be worked around with other compilers.

Instead, this commit rewrites the iterators to avoid disallowed pointer
aliasing.

VMware-BZ: #1287651
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-21 21:00:04 -07:00
+								    cursor.rule = NULL;
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
 								    /* Find first rule. */
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								    PVECTOR_CURSOR_FOR_EACH (subtable, &cursor.subtables,
 								                             &cursor.cls->subtables) {
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								        const struct cls_rule *rule = search_subtable(subtable, &cursor);
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								        if (rule) {
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								            cursor.subtable = subtable;
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								            cursor.rule = rule;
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								            break;
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								        }
 								    }
-												lib/classifier: Simplify iteration with C99 declaration.

Hide the cursor from the classifier iteration users and move locking to
the iterators.  This will make following RCU changes simpler, as the call
sites of the iterators need not be changed at that point.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    return cursor;
 								}
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								static const struct cls_rule *
-												classifier: Refactor cls_cursor_advance() to make it easier to follow.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-21 21:00:34 -07:00
+								cls_cursor_next(struct cls_cursor *cursor)
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								{
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    const struct cls_rule *rule;
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    const struct cls_subtable *subtable;
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    rule = cursor->rule;
 								    subtable = cursor->subtable;
 								    RCULIST_FOR_EACH_CONTINUE (rule, node, &subtable->rules_list) {
-												classifier: Make versioning more explicit.

Now that struct cls_match has 'add_version' the 'version' in cls_match
was largely redundant.  Remove 'version' from struct cls_rule, and add
it to function prototypes that need it.  This makes versioning more
explicit (or less indirect) in the API.

Suggested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-07-06 11:45:54 -07:00
+								        if (rule_matches(rule, cursor->target, cursor->version)) {
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								            return rule;
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								        }
 								    }
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								    PVECTOR_CURSOR_FOR_EACH_CONTINUE (subtable, &cursor->subtables) {
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								        rule = search_subtable(subtable, cursor);
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								        if (rule) {
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								            cursor->subtable = subtable;
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								            return rule;
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								        }
 								    }
-												classifier: Refactor cls_cursor_advance() to make it easier to follow.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-21 21:00:34 -07:00
+								    return NULL;
 								}
 								/* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
 								 * or to null if all matching rules have been visited. */
 								void
 								cls_cursor_advance(struct cls_cursor *cursor)
 								{
 								    cursor->rule = cls_cursor_next(cursor);
-												classifier: Add functions and macros for iteration, and use them in ofproto.

This is much more convenient in practice than being forced to use a
callback function.

											
										
										
											2010-10-28 16:18:20 -07:00
+								}
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								static struct cls_subtable *
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								find_subtable(const struct classifier *cls, const struct minimask *mask)
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								{
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    struct cls_subtable *subtable;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0),
-												lib/classifier: Rename 'cls_subtable_cache' as 'cls_subtables'.

'cache' gives an inexact connotation, as the list is always expected
to be in order and contain pointers to all the subtables.

The struct cls_subtables fields are are also renamed to be more readable.

struct cls_classifier fields 'subtables' is remamed to 'subtables_map' and
'subtables_priority' is renamed to 'subtables',

There are no functional changes in this patch.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-19 10:41:03 -07:00
+								                             &cls->subtables_map) {
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								        if (minimask_equal(mask, &subtable->mask)) {
 								            return subtable;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
 								    }
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								    return NULL;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								/* Initializes 'map' with a subset of 'miniflow''s maps that includes only the
 								 * portions with u64-offset 'i' such that 'start' <= i < 'end'.  Does not copy
 								 * any data from 'miniflow' to 'map'. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								static struct flowmap
 								miniflow_get_map_in_range(const struct miniflow *miniflow, uint8_t start,
 								                          uint8_t end)
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								{
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    struct flowmap map;
 								    size_t ofs = 0;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    map = miniflow->map;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    /* Clear the bits before 'start'. */
 								    while (start >= MAP_T_BITS) {
 								        start -= MAP_T_BITS;
 								        ofs += MAP_T_BITS;
 								        map.bits[start / MAP_T_BITS] = 0;
 								    }
 								    if (start > 0) {
 								        flowmap_clear(&map, ofs, start);
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    /* Clear the bits starting at 'end'. */
 								    if (end < FLOW_U64S) {
 								        /* flowmap_clear() can handle at most MAP_T_BITS at a time. */
 								        ovs_assert(FLOW_U64S - end <= MAP_T_BITS);
 								        flowmap_clear(&map, end, FLOW_U64S - end);
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return map;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								}
-												lib/classifier: Use internal mutex.

Add an internal mutex to struct cls_classifier, and reorganize
classifier internal structures according to the user of each field,
marking the fields that need to be protected by the mutex.  This makes
locking requirements easier to track, and may make lookup more memory
efficient.

After this patch there is some double locking, as callers are taking
the fat-rwlock, and we take the mutex internally.  A following patch
will remove the classifier fat-rwlock, removing the (double) locking
overhead.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								/* The new subtable will be visible to the readers only after this. */
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								static struct cls_subtable *
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								insert_subtable(struct classifier *cls, const struct minimask *mask)
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								{
-												classifier: Speed up lookup when metadata partitions the flow table.

We have a controller that puts many rules with different metadata values
into the flow table, where metadata is used (by "resubmit"s) to distinguish
stages in a pipeline.  Thus, any given flow only needs to be hashed into
classifier "cls_table"s that contain a match for the flow's metadata value.
This commit optimizes the classifier lookup by (probabilistically) skipping
the "cls_table"s that can't possibly match.

(The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
which is a 64-bit field similar in purpose to the "registers" defined by
Open vSwitch.)

Previous versions of this patch, with earlier versions of the controller in
question, improved flow setup performance by about 19%.

Bug #14282.
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-25 15:07:21 -07:00
+								    uint32_t hash = minimask_hash(mask, 0);
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    struct cls_subtable *subtable;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    int i, index = 0;
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    struct flowmap stage_map;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    uint8_t prev;
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    size_t count = miniflow_n_values(&mask->masks);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												flow: Always inline miniflows.

Now that performance critical code already inlines miniflows and
minimasks, we can simplify struct miniflow by always dynamically
allocating miniflows and minimasks to the correct size.  This changes
the struct minimatch to always contain pointers to its miniflow and
minimask.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    subtable = xzalloc(sizeof *subtable + MINIFLOW_VALUES_SIZE(count));
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    cmap_init(&subtable->rules);
-												flow: Eliminate miniflow_clone() and minimask_clone().

miniflow_clone() and minimask_clone() are no longer used, remove them
from the API.

Now that miniflow data is always inlined, it makes sense to rename
miniflow_clone_inline() miniflow_clone().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 13:17:01 -07:00
+								    miniflow_clone(CONST_CAST(struct miniflow *, &subtable->mask.masks),
 								                   &mask->masks, count);
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
 								    /* Init indices for segmented lookup, if any. */
 								    prev = 0;
 								    for (i = 0; i < cls->n_flow_segments; i++) {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        stage_map = miniflow_get_map_in_range(&mask->masks, prev,
 								                                              cls->flow_segments[i]);
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								        /* Add an index if it adds mask bits. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        if (!flowmap_is_empty(stage_map)) {
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								            ccmap_init(&subtable->indices[index]);
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								            *CONST_CAST(struct flowmap *, &subtable->index_maps[index])
 								                = stage_map;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								            index++;
 								        }
 								        prev = cls->flow_segments[i];
 								    }
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    /* Map for the final stage. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    *CONST_CAST(struct flowmap *, &subtable->index_maps[index])
 								        = miniflow_get_map_in_range(&mask->masks, prev, FLOW_U64S);
-												classifier: Remove redundant index.

The test for figuring out if the last index had the same fields as the
actual rules map as broken, resulting into keeping an unnecessary
index around.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-04 13:00:05 -07:00
+								    /* Check if the final stage adds any bits. */
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    if (index > 0) {
-												classifier: Remove redundant index.

The test for figuring out if the last index had the same fields as the
actual rules map as broken, resulting into keeping an unnecessary
index around.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-04 13:00:05 -07:00
+								        if (flowmap_is_empty(subtable->index_maps[index])) {
 								            /* Remove the last index, as it has the same fields as the rules
 								             * map. */
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								            --index;
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								            ccmap_destroy(&subtable->indices[index]);
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								        }
 								    }
-												classifier: Constify fields.

Some struct cls_match and cls_subtable fields were already documented
of being const.  Make them const and use CONST_CAST where appropriate
to initialize them.

This will help catch future errors modifying those fields after
initialization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-27 10:57:28 -07:00
+								    *CONST_CAST(uint8_t *, &subtable->n_indices) = index;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    for (i = 0; i < cls->n_tries; i++) {
 								        subtable->trie_plen[i] = minimask_get_prefix_len(mask,
 								                                                         cls->tries[i].field);
 								    }
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    /* Ports trie. */
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    ovsrcu_set_hidden(&subtable->ports_trie, NULL);
-												classifier: Constify fields.

Some struct cls_match and cls_subtable fields were already documented
of being const.  Make them const and use CONST_CAST where appropriate
to initialize them.

This will help catch future errors modifying those fields after
initialization.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-10-27 10:57:28 -07:00
+								    *CONST_CAST(int *, &subtable->ports_mask_len)
-												flow: Improve type-safety of MINIFLOW_GET_TYPE.

Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 21:34:26 -07:00
+								        = 32 - ctz32(ntohl(miniflow_get_ports(&mask->masks)));
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    /* List of rules. */
 								    rculist_init(&subtable->rules_list);
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);
-												classifier: Use array for subtables instead of a list.

Using a linear array allows more efficient memory access for lookups.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								    return subtable;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												classifier: Clean up destroy_subtable.

Add asserts to make sure the containers within are already empty.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-03 11:23:11 -08:00
+								/* RCU readers may still access the subtable before it is actually freed. */
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								static void
-												lib/classifier: Unify struct classifier and cls_classifier.

Now that it is clear that struct cls_classifier itself does not
need RCU indirection and pvector is defined in its own header, it
is possible get rid of the indirection from struct classifier to
struct cls_classifier.

Suggested-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								{
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    int i;
-												Revert "pvector: Expose non-concurrent priority vector."

This reverts commit 8bdfe1313894047d44349fa4cf4402970865950f.

I failed to see that lib/dpif-netdev.c actually needs the concurrency
provided by pvector prior to this change.  More specifically, when a
subtable is removed, concurrent lookups may skip over another subtable
swapped in to the place of the removed subtable in the vector.

Since this was the only use of the non-concurrent pvector, it is
cleaner to revert the whole patch.

Reported-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
											
										
										
											2016-08-10 14:58:51 -07:00
+								    pvector_remove(&cls->subtables, subtable);
-												classifier: Clean up destroy_subtable.

Add asserts to make sure the containers within are already empty.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-03 11:23:11 -08:00
+								    cmap_remove(&cls->subtables_map, &subtable->cmap_node,
 								                minimask_hash(&subtable->mask, 0));
 								    ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie)
 								               == NULL);
 								    ovs_assert(cmap_is_empty(&subtable->rules));
-												classifier: Lockless and robust classifier iteration.

Previously, accurate iteration required writers to be excluded during
iteration.  This patch adds an rculist to struct cls_subtable, and a
corresponding list node to struct cls_rule, which makes iteration more
straightforward, and allows the iterators to remain ignorant of the
internals of the cls_match.  This new list allows iteration of rules
in the classifier by traversing the RCU-friendly subtables vector, and
the rculist of rules in each subtable.

Classifier modifications may be performed concurrently, but whether or
not the concurrent iterator sees those changes depends on the timing
of change.  More specifically, an concurrent iterator:

- May or may not see a rule that is being inserted or removed.
- Will see either the new or the old version of a rule that is replaced.
- Will see all the other rules (that are not being modified).

Finally, The subtable's rculist also allows to make
classifier_rule_overlaps() lockless, which this patch also does.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-13 11:54:31 -08:00
+								    ovs_assert(rculist_is_empty(&subtable->rules_list));
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    for (i = 0; i < subtable->n_indices; i++) {
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								        ccmap_destroy(&subtable->indices[i]);
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    }
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    cmap_destroy(&subtable->rules);
-												lib/pvector: Non-intrusive RCU priority vector.

Factor out the priority vector code from the classifier.

Making the classifier use RCU instead of locking requires parallel
access to the priority vector, pointing to subtables in descending
priority order.  When a new subtable is added, a new copy of the
priority vector is allocated, while the current readers can keep on
using the old copy they started with.  Adding and removing subtables
is usually less frequent than adding and removing rules, so this
should not have a visible performance implication.  As an optimization
for the userspace datapath use, where all the subtables have the same
priority, new subtables can be added to the end of the vector without
reallocation and without disturbing readers.

cls_subtables_reset() is now removed, as it served its purpose in bug
hunting.  Checks on the new pvector are now incorporated into
tests/test-classifier.c.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-26 07:41:25 -07:00
+								    ovsrcu_postpone(free, subtable);
-												classifier: Refactor table priority updates and tables_priority reordering.

I find this organization clearer.

CC: Jarno Rajahalme <jarno.rajahalme@nsn.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-02-11 13:11:42 -08:00
+								}
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								/* Return 'true' if can skip rest of the subtable based on the prefix trie
 								 * lookup results. */
 								static inline bool
 								check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
 								            const unsigned int field_plen[CLS_MAX_TRIES],
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								            const struct flowmap range_map, const struct flow *flow,
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            struct flow_wildcards *wc)
 								{
 								    int j;
 								    /* Check if we could avoid fully unwildcarding the next level of
 								     * fields using the prefix tries.  The trie checks are done only as
 								     * needed to avoid folding in additional bits to the wildcards mask. */
 								    for (j = 0; j < n_tries; j++) {
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        /* Is the trie field relevant for this subtable, and
 								           is the trie field within the current range of fields? */
 								        if (field_plen[j] &&
 								            flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            struct trie_ctx *ctx = &trie_ctx[j];
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
 								            /* On-demand trie lookup. */
 								            if (!ctx->lookup_done) {
 								                memset(&ctx->match_plens, 0, sizeof ctx->match_plens);
 								                ctx->maskbits = trie_lookup(ctx->trie, flow, &ctx->match_plens);
 								                ctx->lookup_done = true;
 								            }
 								            /* Possible to skip the rest of the subtable if subtable's
 								             * prefix on the field is not included in the lookup result. */
 								            if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) {
 								                /* We want the trie lookup to never result in unwildcarding
 								                 * any bits that would not be unwildcarded otherwise.
 								                 * Since the trie is shared by the whole classifier, it is
 								                 * possible that the 'maskbits' contain bits that are
 								                 * irrelevant for the partition relevant for the current
 								                 * packet.  Hence the checks below. */
 								                /* Check that the trie result will not unwildcard more bits
 								                 * than this subtable would otherwise. */
 								                if (ctx->maskbits <= field_plen[j]) {
 								                    /* Unwildcard the bits and skip the rest. */
 								                    mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
 								                    /* Note: Prerequisite already unwildcarded, as the only
 								                     * prerequisite of the supported trie lookup fields is
 								                     * the ethertype, which is always unwildcarded. */
 								                    return true;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                /* Can skip if the field is already unwildcarded. */
 								                if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {
 								                    return true;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                }
 								            }
 								        }
 								    }
 								    return false;
 								}
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
 								 * for which 'flow', for which 'mask' has a bit set, specifies a particular
 								 * value has the correct value in 'target'.
 								 *
 								 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
-												lib/classifier: Optimize megaflows for single rule case.

When, during a classifier lookup, we narrow down to a single potential
rule, it is enough to match on ("unwildcard") one bit that differs
between the packet and the rule.

This is a special case of the more general algorithm, where it is
sufficient to match on enough bits that separates the packet from all
higher priority rules than the matched rule.  For a miss that would be
all the rules.  Implementing this is expensive for a more than a few
rules.  This patch starts by doing this for a single rule when we
already have it, also reducing the lookup cost by finishing the lookup
earlier than before.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
+								 * target, mask) but this is faster because of the invariant that
 								 * flow->map and mask->masks.map are the same, and that this version
 								 * takes the 'wc'. */
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								static inline bool
 								miniflow_and_mask_matches_flow(const struct miniflow *flow,
 								                               const struct minimask *mask,
-												lib/classifier: Add miniflow_and_mask_matches_flow_wc.

miniflow_and_mask_matches_flow_wc() fills in the masks in flow
wildcards, so a separate step to that effect is no longer needed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
+								                               const struct flow *target)
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								{
-												flow: Make compile with MSVC.

MSVC does not like zero sized arrays in structs.  Hence, remove the
'values' member from struct miniflow and add back the getters
miniflow_values() and miniflow_get_values().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-16 17:42:24 -07:00
+								    const uint64_t *flowp = miniflow_get_values(flow);
 								    const uint64_t *maskp = miniflow_get_values(&mask->masks);
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    const uint64_t *target_u64 = (const uint64_t *)target;
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    map_t map;
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    FLOWMAP_FOR_EACH_MAP (map, mask->masks.map) {
 								        size_t idx;
 								        MAP_FOR_EACH_INDEX (idx, map) {
 								            if ((*flowp++ ^ target_u64[idx]) & *maskp++) {
 								                return false;
 								            }
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								        }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        target_u64 += MAP_T_BITS;
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								    }
 								    return true;
 								}
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								static inline const struct cls_match *
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								find_match(const struct cls_subtable *subtable, ovs_version_t version,
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								           const struct flow *flow, uint32_t hash)
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								{
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    const struct cls_match *head, *rule;
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								    CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
 								        if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow,
 								                                                      &subtable->mask,
 								                                                      flow))) {
 								            /* Return highest priority rule that is visible. */
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
+								            CLS_MATCH_FOR_EACH (rule, head) {
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								                if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) {
-												classifier: Add support for invisible flows.

This makes it possible to tentatively add flows to the classifier
without the datapath seeing them.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-05-29 11:28:38 -07:00
+								                    return rule;
 								                }
 								            }
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
 								    }
-												classifier: Optimize search of "catchall" table.

Most flow tables have some kind of "catchall" rule that matches every
packet.  For this table, the cost of copying, zeroing, and hashing the
input flow is significant.  This patch avoids these costs.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-09 15:49:22 -07:00
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    return NULL;
 								}
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								static const struct cls_match *
-												lib: Separate versioning to its own module.

Separate rule versioning to lib/versions.h to make it easier to use
versioning for other data types.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:01 -07:00
+								find_match_wc(const struct cls_subtable *subtable, ovs_version_t version,
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								              const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
 								              unsigned int n_tries, struct flow_wildcards *wc)
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								{
-												classifier: Use array for subtables instead of a list.

Using a linear array allows more efficient memory access for lookups.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								    if (OVS_UNLIKELY(!wc)) {
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								        return find_match(subtable, version, flow,
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								                          flow_hash_in_minimask(flow, &subtable->mask, 0));
 								    }
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    uint32_t basis = 0, hash;
 								    const struct cls_match *rule = NULL;
 								    struct flowmap stages_map = FLOWMAP_EMPTY_INITIALIZER;
 								    unsigned int mask_offset = 0;
 								    int i;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    /* Try to finish early by checking fields in segments. */
 								    for (i = 0; i < subtable->n_indices; i++) {
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                        subtable->index_maps[i], flow, wc)) {
-												lib/classifier: Clarify find_match_wc().

Reduce the number of goto statements by returning via a new helper
fill_range_wc() when no match is found.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
+								            /* 'wc' bits for the trie field set, now unwildcard the preceding
 								             * bits used so far. */
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								            goto no_match;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
 								        /* Accumulate the map used so far. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        stages_map = flowmap_or(stages_map, subtable->index_maps[i]);
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
 								        hash = flow_hash_in_minimask_range(flow, &subtable->mask,
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                           subtable->index_maps[i],
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                           &mask_offset, &basis);
-												classifier: Use ccmaps for staged lookup indices.

Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya <alok-kumar.maurya@hpe.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>
											
										
										
											2016-04-22 19:40:09 -07:00
+								        if (!ccmap_find(&subtable->indices[i], hash)) {
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								            goto no_match;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								        }
 								    }
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    /* Trie check for the final range. */
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                    subtable->index_maps[i], flow, wc)) {
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        goto no_match;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    hash = flow_hash_in_minimask_range(flow, &subtable->mask,
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                       subtable->index_maps[i],
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								                                       &mask_offset, &basis);
-												classifier: Support table versioning

This patch allows classifier rules to become visible and invisible in
specific versions.  A 'version' is defined as a positive monotonically
increasing integer, which never wraps around.

The new 'visibility' attribute replaces the prior 'to_be_removed' and
'visible' attributes.

When versioning is not used, the 'version' parameter should be passed
as 'CLS_MIN_VERSION' when creating rules, and 'CLS_MAX_VERSION' when
looking up flows.

This feature enables the support for atomic OpenFlow bundles without
significant performance penalty on 64-bit systems. There is a
performance decrease in 32-bit systems due to 64-bit atomics used.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-06-09 17:00:00 -07:00
+								    rule = find_match(subtable, version, flow, hash);
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    if (!rule && subtable->ports_mask_len) {
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        /* The final stage had ports, but there was no match.  Instead of
 								         * unwildcarding all the ports bits, use the ports trie to figure out a
 								         * smaller set of bits to unwildcard. */
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								        unsigned int mbits;
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								        ovs_be32 value, plens, mask;
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
-												flow: Improve type-safety of MINIFLOW_GET_TYPE.

Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Armando Migliaccio <armamig@gmail.com>

											
										
										
											2018-03-19 21:34:26 -07:00
+								        mask = miniflow_get_ports(&subtable->mask.masks);
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								        value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								        mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
 								        ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
-												util: Add be32_prefix_mask().

Shifting a 32-bit entity by 32 bits is undefined behavior.  As we have 3
cases where we may hit this, it is a time to introduce a helper for
this.

VMware-BZ: #1355026
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-11-11 15:50:51 -08:00
+								            mask & be32_prefix_mask(mbits);
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								        goto no_match;
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    }
-												lib/classifier: Add miniflow_and_mask_matches_flow_wc.

miniflow_and_mask_matches_flow_wc() fills in the masks in flow
wildcards, so a separate step to that effect is no longer needed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-06-13 10:38:05 -07:00
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    /* Must unwildcard all the fields, as they were looked at. */
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								    flow_wildcards_fold_minimask(wc, &subtable->mask);
 								    return rule;
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
 								no_match:
 								    /* Unwildcard the bits in stages so far, as they were used in determining
 								     * there is no match. */
-												flow: Add struct flowmap.

Struct miniflow is now sometimes used just as a map.  Define a new
struct flowmap for that purpose.  The flowmap is defined as an array of
maps, and it is automatically sized according to the size of struct
flow, so it will be easier to maintain in the future.

It would have been tempting to use the existing struct bitmap for this
purpose. The main reason this is not feasible at the moment is that
some flowmap algorithms are simpler when it can be assumed that no
struct flow member requires more bits than can fit to a single map
unit. The tunnel member already requires more than 32 bits, so the map
unit needs to be 64 bits wide.

Performance critical algorithms enumerate the flowmap array units
explicitly, as it is easier for the compiler to optimize, compared to
the normal iterator.  Without this optimization a classifier lookup
without wildcard masks would be about 25% slower.

With this more general (and maintainable) algorithm the classifier
lookups are about 5% slower, when the struct flow actually becomes big
enough to require a second map.  This negates the performance gained
in the "Pre-compute stage masks" patch earlier in the series.

Requested-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    flow_wildcards_fold_minimask_in_map(wc, &subtable->mask, stages_map);
-												classifier: Pre-compute stage masks.

This makes stage mask computation happen only when a subtable is
inserted and allows simplification of the main lookup function.

Classifier benchmark shows that this speeds up the classification
(with wildcards) about 5%.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-08-25 13:55:03 -07:00
+								    return NULL;
-												Classifier: Staged subtable matching.

Subtable lookup is performed in ranges defined for struct flow,
starting from metadata (registers, in_port, etc.), then L2 header, L3,
and finally L4 ports.  Whenever it is found that there are no matches
in the current subtable, the rest of the subtable can be skipped.  The
rationale of this logic is that as many fields as possible can remain
wildcarded.


Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2013-11-19 17:31:29 -08:00
+								}
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								static struct cls_match *
-												classifier: Constify RCU pointers.

Returning const struct cls_rule pointers from the classifier API helps
callers to remember that they should not modify the rules returned.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-11-06 14:55:29 -08:00
+								find_equal(const struct cls_subtable *subtable, const struct miniflow *flow,
-												classifier: Rename struct cls_table as cls_subtable.

The naming of the classifier table has been a source of confusion,
since each OpenFlow table is implemented as a classifier, which
consists of multiple (sub)tables.  This name change hopefully makes
classifier related discussion a bit less confusing.

For consistency, relevant field names as well as the function and
variable names have been renamed in similar fashion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 16:39:52 -07:00
+								           uint32_t hash)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												lib/classifier: Separate cls_rule internals from the API.

Keep an internal representation of a rule separate from the one
embedded into user's structs.  This allows for further memory
optimization in the classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:38 -07:00
+								    struct cls_match *head;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												lib/classifier: Use cmap.

Use cmap instead of hmap & hindex in classifier.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:07 -07:00
+								    CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
-												lib/classifier: Support variable sized miniflows.

Change the classifier to allocate variable sized miniflows and
minimasks in cls_match and cls_subtable, respectively.  Do not
duplicate the mask in cls_rule any more.

miniflow_clone and miniflow_move can now take variably sized miniflows
as source.  The destination is assumed to be regularly sized miniflow.

Inlining miniflow and mask values reduces memory indirection and helps
reduce cache misses.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-29 15:50:39 -07:00
+								        if (miniflow_equal(&head->flow, flow)) {
-												classifier: Rewrite.

The old classifier was not adaptive: it required knowing the structure of
the flows that were likely to be in use to get good performance.  It is
likely that it degenerated to linear search in any real-world case.

This new classifier is adaptive and should perform better in the real
world.

											
										
										
											2010-11-03 11:00:58 -07:00
+								            return head;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
 								    }
 								    return NULL;
 								}
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
 								/* A longest-prefix match tree. */
 								/* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
 								 * Prefixes are in the network byte order, and the offset 0 corresponds to
 								 * the most significant bit of the first byte.  The offset can be read as
 								 * "how many bits to skip from the start of the prefix starting at 'pr'". */
 								static uint32_t
 								raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
 								{
 								    uint32_t prefix;
 								    pr += ofs / 32; /* Where to start. */
 								    ofs %= 32;      /* How many bits to skip at 'pr'. */
 								    prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */
 								    if (plen > 32 - ofs) {      /* Need more than we have already? */
 								        prefix |= ntohl(*++pr) >> (32 - ofs);
 								    }
 								    /* Return with possible unwanted bits at the end. */
 								    return prefix;
 								}
 								/* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
 								 * offset 'ofs'.  Prefixes are in the network byte order, and the offset 0
 								 * corresponds to the most significant bit of the first byte.  The offset can
 								 * be read as "how many bits to skip from the start of the prefix starting at
 								 * 'pr'". */
 								static uint32_t
 								trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
 								{
 								    if (!plen) {
 								        return 0;
 								    }
 								    if (plen > TRIE_PREFIX_BITS) {
 								        plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */
 								    }
 								    /* Return with unwanted bits cleared. */
 								    return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen);
 								}
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								/* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								 * starting at "MSB 0"-based offset 'ofs'. */
 								static unsigned int
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[],
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                  unsigned int ofs)
 								{
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    /* Set the bit after the relevant bits to limit the result. */
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								/* Return the number of equal bits in 'node' prefix and a 'prefix' of length
 								 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
 								static unsigned int
 								trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[],
 								                       unsigned int ofs, unsigned int plen)
 								{
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs),
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                             prefix, ofs);
 								}
 								/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int.  'ofs' can
 								 * be greater than 31. */
 								static unsigned int
 								be_get_bit_at(const ovs_be32 value[], unsigned int ofs)
 								{
 								    return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u;
 								}
 								/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int.  'ofs' must
 								 * be between 0 and 31, inclusive. */
 								static unsigned int
 								get_bit_at(const uint32_t prefix, unsigned int ofs)
 								{
 								    return (prefix >> (31 - ofs)) & 1u;
 								}
 								/* Create new branch. */
 								static struct trie_node *
 								trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen,
 								                   unsigned int n_rules)
 								{
 								    struct trie_node *node = xmalloc(sizeof *node);
 								    node->prefix = trie_get_prefix(prefix, ofs, plen);
 								    if (plen <= TRIE_PREFIX_BITS) {
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        node->n_bits = plen;
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        ovsrcu_set_hidden(&node->edges[0], NULL);
 								        ovsrcu_set_hidden(&node->edges[1], NULL);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        node->n_rules = n_rules;
 								    } else { /* Need intermediate nodes. */
 								        struct trie_node *subnode = trie_branch_create(prefix,
 								                                                       ofs + TRIE_PREFIX_BITS,
 								                                                       plen - TRIE_PREFIX_BITS,
 								                                                       n_rules);
 								        int bit = get_bit_at(subnode->prefix, 0);
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        node->n_bits = TRIE_PREFIX_BITS;
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        ovsrcu_set_hidden(&node->edges[bit], subnode);
 								        ovsrcu_set_hidden(&node->edges[!bit], NULL);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        node->n_rules = 0;
 								    }
 								    return node;
 								}
 								static void
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								trie_node_destroy(const struct trie_node *node)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node));
 								}
 								/* Copy a trie node for modification and postpone delete the old one. */
 								static struct trie_node *
 								trie_node_rcu_realloc(const struct trie_node *node)
 								{
 								    struct trie_node *new_node = xmalloc(sizeof *node);
 								    *new_node = *node;
 								    trie_node_destroy(node);
 								    return new_node;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								static void
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								trie_destroy(rcu_trie_ptr *trie)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    if (node) {
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        ovsrcu_set_hidden(trie, NULL);
 								        trie_destroy(&node->edges[0]);
 								        trie_destroy(&node->edges[1]);
 								        trie_node_destroy(node);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
 								}
 								static bool
 								trie_is_leaf(const struct trie_node *trie)
 								{
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    /* No children? */
 								    return !ovsrcu_get(struct trie_node *, &trie->edges[0])
 								        && !ovsrcu_get(struct trie_node *, &trie->edges[1]);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								static void
 								mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs,
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                     unsigned int n_bits)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
 								    ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
 								    unsigned int i;
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    for (i = 0; i < n_bits / 32; i++) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        mask[i] = OVS_BE32_MAX;
 								    }
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    if (n_bits % 32) {
 								        mask[i] |= htonl(~0u << (32 - n_bits % 32));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
 								}
 								static bool
 								mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs,
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                     unsigned int n_bits)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
 								    ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
 								    unsigned int i;
 								    ovs_be32 zeroes = 0;
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    for (i = 0; i < n_bits / 32; i++) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        zeroes |= ~mask[i];
 								    }
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    if (n_bits % 32) {
 								        zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    return !zeroes; /* All 'n_bits' bits set. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								static rcu_trie_ptr *
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								trie_next_edge(struct trie_node *node, const ovs_be32 value[],
 								               unsigned int ofs)
 								{
 								    return node->edges + be_get_bit_at(value, ofs);
 								}
 								static const struct trie_node *
 								trie_next_node(const struct trie_node *node, const ovs_be32 value[],
 								               unsigned int ofs)
 								{
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    return ovsrcu_get(struct trie_node *,
 								                      &node->edges[be_get_bit_at(value, ofs)]);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								/* Set the bit at ("MSB 0"-based) offset 'ofs'.  'ofs' can be greater than 31.
 								 */
 								static void
 								be_set_bit_at(ovs_be32 value[], unsigned int ofs)
 								{
 								    ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8);
 								}
 								/* Returns the number of bits in the prefix mask necessary to determine a
 								 * mismatch, in case there are longer prefixes in the tree below the one that
 								 * matched.
 								 * '*plens' will have a bit set for each prefix length that may have matching
 								 * rules.  The caller is responsible for clearing the '*plens' prior to
 								 * calling this.
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								 */
 								static unsigned int
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[],
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								                  ovs_be32 plens[], unsigned int n_bits)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
 								    const struct trie_node *prev = NULL;
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    const struct trie_node *node = ovsrcu_get(struct trie_node *, trie);
 								    unsigned int match_len = 0; /* Number of matching bits. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												lib/classifier: Change local variable names.

These stylistic changes makes the following patch a bit simpler.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    for (; node; prev = node, node = trie_next_node(node, value, match_len)) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        unsigned int eqbits;
 								        /* Check if this edge can be followed. */
-												lib/classifier: Change local variable names.

These stylistic changes makes the following patch a bit simpler.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-07-18 02:24:26 -07:00
+								        eqbits = prefix_equal_bits(node->prefix, node->n_bits, value,
 								                                   match_len);
 								        match_len += eqbits;
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */
-												lib/classifier: Change local variable names.

These stylistic changes makes the following patch a bit simpler.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-07-18 02:24:26 -07:00
+								            /* Bit at offset 'match_len' differed. */
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								            return match_len + 1; /* Includes the first mismatching bit. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
 								        /* Full match, check if rules exist at this prefix length. */
 								        if (node->n_rules > 0) {
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								            be_set_bit_at(plens, match_len - 1);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
-												lib/classifier: Change local variable names.

These stylistic changes makes the following patch a bit simpler.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-07-18 02:24:26 -07:00
+								        if (match_len >= n_bits) {
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								            return n_bits; /* Full prefix. */
-												lib/classifier: Fix use of uninitialized memory.

When reaching the end of a prefix trie, we checked one bit off the end
to the intended data.  However, since the trie node in that case has
NULLs for both edge links, this did not result in incorrect
functionality.

Found via check-valgrind.

Reported-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-13 14:52:59 -07:00
+								        }
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    /* node == NULL.  Full match so far, but we tried to follow an
 								     * non-existing branch.  Need to exclude the other branch if it exists
 								     * (it does not if we were called on an empty trie or 'prev' is a leaf
 								     * node). */
 								    return !prev || trie_is_leaf(prev) ? match_len : match_len + 1;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								static unsigned int
 								trie_lookup(const struct cls_trie *trie, const struct flow *flow,
-												classifier: Do not use mf_value.

mf_value has grown bigger than needed for storing the biggest
supported prefix (IPv6 address length).  Define a new type to be used
instead of mf_value.

This makes classifier lookups a bit faster.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-12 17:03:07 -07:00
+								            union trie_prefix *plens)
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								{
 								    const struct mf_field *mf = trie->field;
 								    /* Check that current flow matches the prerequisites for the trie
 								     * field.  Some match fields are used for multiple purposes, so we
 								     * must check that the trie is relevant for this flow. */
-												meta-flow: Clean up masking with prerequisities checking.

Change mf_are_prereqs_ok() take a flow_wildcards pointer, so that the
wildcards can be set at the same time as the prerequisiteis are
checked.  This makes it easier to write more obviously correct code.

Remove the functions mf_mask_field_and_prereqs() and
mf_mask_field_and_prereqs__(), and make the callers first check the
prerequisites, while supplying 'wc' to mf_are_prereqs_ok(), and if
successful, mask the bits of the field that were read or set using
mf_mask_field_masked().

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 16:52:03 -07:00
+								    if (mf_are_prereqs_ok(mf, flow, NULL)) {
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        return trie_lookup_value(&trie->root,
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                                 &((ovs_be32 *)flow)[mf->flow_be32ofs],
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								                                 &plens->be32, mf->n_bits);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    }
-												lib/classifier: Return all matching prefix lengths from trie lookup.

Previously we only returned the last matching prefix length
encountered during a trie lookup, and skipped subtables that had
prefixes longer than that.  This patch changes the trie lookup
functions to return all matching prefix lengths seen, so that all
non-matching prefix lengths can be skipped.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-18 02:24:26 -07:00
+								    memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */
 								    return 0; /* Value not used in this case. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								/* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
 								 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
 								 * 'miniflow_index' is not NULL. */
 								static unsigned int
 								minimask_get_prefix_len(const struct minimask *minimask,
 								                        const struct mf_field *mf)
 								{
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								    uint8_t be32_ofs = mf->flow_be32ofs;
 								    uint8_t be32_end = be32_ofs + mf->n_bytes / 4;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								    for (; be32_ofs < be32_end; ++be32_ofs) {
 								        uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
 								        /* Validate mask, count the mask length. */
 								        if (mask_tz) {
 								            if (mask) {
 								                return 0; /* No bits allowed after mask ended. */
 								            }
 								        } else {
 								            if (~mask & (~mask + 1)) {
 								                return 0; /* Mask not contiguous. */
 								            }
 								            mask_tz = ctz32(mask);
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            n_bits += 32 - mask_tz;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								        }
 								    }
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    return n_bits;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								/*
 								 * This is called only when mask prefix is known to be CIDR and non-zero.
 								 * Relies on the fact that the flow and mask have the same map, and since
 								 * the mask is CIDR, the storage for the flow field exists even if it
 								 * happened to be zeros.
 								 */
 								static const ovs_be32 *
 								minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
 								{
-												flow: Split miniflow's map.

Use two maps in miniflow to allow for expansion of struct flow past
512 bytes.  We now have one map for tunnel related fields, and another
for the rest of the packet metadata and actual packet header fields.
This split has the benefit that for non-tunneled packets the overhead
should be minimal.

Some miniflow utilities now exist in two variants, new ones operating
over all the data, and the old ones operating only on a single 64-bit
map at a time.  The old ones require doubling of code but should
execute faster, so those are used in the datapath and classifier's
lookup path.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 15:18:43 -07:00
+								    size_t u64_ofs = mf->flow_be32ofs / 2;
 								    return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
-												miniflow: Use 64-bit data.

So far the compressed flow data in struct miniflow has been in 32-bit
words with a 63-bit map, allowing for a maximum size of struct flow of
252 bytes.  With the forthcoming Geneve options this is not sufficient
any more.

This patch solves the problem by changing the miniflow data to 64-bit
words, doubling the flow max size to 504 bytes.  Since the word size
is doubled, there is some loss in compression efficiency.  To counter
this some of the flow fields have been reordered to keep related
fields together (e.g., the source and destination IP addresses share
the same 64-bit word).

This change should speed up flow data processing on 64-bit CPUs, which
may help counterbalance the impact of making the struct flow bigger in
the future.

Classifier lookup stage boundaries are also changed to 64-bit
alignment, as the current algorithm depends on each miniflow word to
not be split between ranges.  This has resulted in new padding (part
of the 'mpls_lse' field).

The 'dp_hash' field is also moved to packet metadata to eliminate
otherwise needed padding there.  This allows the L4 to fit into one
64-bit word, and also makes matches on 'dp_hash' more efficient as
misses can be found already on stage 1.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-01-06 11:10:42 -08:00
+								        + (mf->flow_be32ofs & 1);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								/* Insert rule in to the prefix tree.
 								 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
 								 * in 'rule'. */
 								static void
 								trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
 								{
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    trie_insert_prefix(&trie->root,
 								                       minimatch_get_prefix(&rule->match, trie->field), mlen);
 								}
 								static void
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen)
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								{
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    struct trie_node *node;
 								    int ofs = 0;
 								    /* Walk the tree. */
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    for (; (node = ovsrcu_get_protected(struct trie_node *, edge));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								         edge = trie_next_edge(node, prefix, ofs)) {
 								        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
 								        ofs += eqbits;
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        if (eqbits < node->n_bits) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            /* Mismatch, new node needs to be inserted above. */
 								            int old_branch = get_bit_at(node->prefix, eqbits);
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            struct trie_node *new_parent;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits,
 								                                            ofs == mlen ? 1 : 0);
 								            /* Copy the node to modify it. */
 								            node = trie_node_rcu_realloc(node);
 								            /* Adjust the new node for its new position in the tree. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            node->prefix <<= eqbits;
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            node->n_bits -= eqbits;
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            ovsrcu_set_hidden(&new_parent->edges[old_branch], node);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
 								            /* Check if need a new branch for the new rule. */
 								            if (ofs < mlen) {
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                ovsrcu_set_hidden(&new_parent->edges[!old_branch],
 								                                  trie_branch_create(prefix, ofs, mlen - ofs,
 ));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            }
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            ovsrcu_set(edge, new_parent); /* Publish changes. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            return;
 								        }
 								        /* Full match so far. */
 								        if (ofs == mlen) {
 								            /* Full match at the current node, rule needs to be added here. */
 								            node->n_rules++;
 								            return;
 								        }
 								    }
 								    /* Must insert a new tree branch for the new rule. */
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								}
 								/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
 								 * in 'rule'. */
 								static void
 								trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
 								{
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    trie_remove_prefix(&trie->root,
 								                       minimatch_get_prefix(&rule->match, trie->field), mlen);
 								}
 								/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
 								 * in 'rule'. */
 								static void
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen)
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								{
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    struct trie_node *node;
-												classifier: Do not use mf_value.

mf_value has grown bigger than needed for storing the biggest
supported prefix (IPv6 address length).  Define a new type to be used
instead of mf_value.

This makes classifier lookups a bit faster.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-12 17:03:07 -07:00
+								    rcu_trie_ptr *edges[sizeof(union trie_prefix) * CHAR_BIT];
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    int depth = 0, ofs = 0;
 								    /* Walk the tree. */
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
+								    for (edges[0] = root;
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								         (node = ovsrcu_get_protected(struct trie_node *, edges[depth]));
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								         edges[++depth] = trie_next_edge(node, prefix, ofs)) {
 								        unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
-												lib/classifier: Use a prefix tree to optimize ports wildcarding.

Using a prefix tree (aka 'trie') for transport ports matching produces
less specific (more wildcarded) datapath megaflows.

Each subtable that matches on transport ports has it's own ports trie.
This trie is consulted only after a failing lookup to determine the
number of bits that need to be unwildcarded to guarantee that any
packet that should match on any of the other rules will not match this
megaflow.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-30 14:09:08 -07:00
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								        if (eqbits < node->n_bits) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            /* Mismatch, nothing to be removed.  This should never happen, as
 								             * only rules in the classifier are ever removed. */
 								            break; /* Log a warning. */
 								        }
 								        /* Full match so far. */
 								        ofs += eqbits;
 								        if (ofs == mlen) {
 								            /* Full prefix match at the current node, remove rule here. */
 								            if (!node->n_rules) {
 								                break; /* Log a warning. */
 								            }
 								            node->n_rules--;
 								            /* Check if can prune the tree. */
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								            while (!node->n_rules) {
 								                struct trie_node *next,
 								                    *edge0 = ovsrcu_get_protected(struct trie_node *,
 								                                                  &node->edges[0]),
 								                    *edge1 = ovsrcu_get_protected(struct trie_node *,
 								                                                  &node->edges[1]);
 								                if (edge0 && edge1) {
 								                    break; /* A branching point, cannot prune. */
 								                }
 								                /* Else have at most one child node, remove this node. */
 								                next = edge0 ? edge0 : edge1;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
 								                if (next) {
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                    if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) {
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                        break;   /* Cannot combine. */
 								                    }
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                    next = trie_node_rcu_realloc(next); /* Modify. */
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                    /* Combine node with next. */
-												lib/classifier: Stylistic change.

Rename 'nbits' as 'n_bits' to be more consistent with other count-like
fields.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                    next->prefix = node->prefix | next->prefix >> node->n_bits;
 								                    next->n_bits += node->n_bits;
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                }
 								                /* Update the parent's edge. */
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                ovsrcu_set(edges[depth], next); /* Publish changes. */
 								                trie_node_destroy(node);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                if (next || !depth) {
 								                    /* Branch not pruned or at root, nothing more to do. */
 								                    break;
 								                }
-												lib/classifier: RCUify prefix trie code.

cls_set_prefix_fields() now synchronizes explicitly with the readers,
waiting them to finish using the old configuration before changing to
the new configuration.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								                node = ovsrcu_get_protected(struct trie_node *,
 								                                            edges[--depth]);
-												Classifier: Track address prefixes.

Add a prefix tree (trie) structure for tracking the used address
space, enabling skipping classifier tables containing longer masks
than necessary for an address field value in a packet header being
classified.  This enables less unwildcarding for datapath flows in
parts of the address space without host routes.

Trie lookup is interwoven to the staged lookup, so that a trie is
searched only when the configured trie field becomes relevant
for the lookup.  The trie lookup results are retained so that each
trie is checked at most once for each classifier lookup.

This implementation tracks the number of rules at each address prefix
for the whole classifier.  More aggressive table skipping would be
possible by maintaining lists of tables that have prefixes at the
lengths encountered on tree traversal, or by maintaining separate
tries for subsets of rules separated by metadata fields.

Prefix tracking is configured via OVSDB.  A new column "prefixes" is
added to the database table "Flow_Table".  "prefixes" is a set of
string values listing the field names for which prefix lookup should
be used.

As of now, the fields for which prefix lookup can be enabled are:
- tun_id, tun_src, tun_dst
- nw_src, nw_dst (or aliases ip_src and ip_dst)
- ipv6_src, ipv6_dst

There is a maximum number of fields that can be enabled for any one
flow table.  Currently this limit is 3.

Examples:

ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \
 --id=@N1 create Flow_Table name=table0
ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \
 --id=@N1 create Flow_Table name=table1

ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src
ovs-vsctl set Flow_Table table1 prefixes=[]

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            }
 								            return;
 								        }
 								    }
 								    /* Cannot go deeper. This should never happen, since only rules
 								     * that actually exist in the classifier are ever removed. */
 								}
-												classifier: Make traversing identical rules robust.

The traversal of the list of identical rules from the lookup threads
is fragile if the list head is removed during the list traversal.

This patch simplifies the implementation of that list by making the
list NULL terminated, singly linked RCU-protected list.  By having the
NULL at the end there is no longer a possiblity of missing the point
when the list wraps around.  This is significant when there can be
multiple elements with the same priority in the list.

This change also decreases the size of the struct cls_match back
pre-'visibility' attribute size.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-11 15:53:42 -07:00
 								#define CLS_MATCH_POISON (struct cls_match *)(UINTPTR_MAX / 0xf * 0xb)
 								void
 								cls_match_free_cb(struct cls_match *rule)
 								{
 								    ovsrcu_set_hidden(&rule->next, CLS_MATCH_POISON);
 								    free(rule);
 								}