ovs/lib/dpif-netdev.c

/*
 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>
#include "dpif-netdev.h"

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <net/if.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <unistd.h>

#include "classifier.h"
#include "cmap.h"
#include "csum.h"
#include "dpif.h"
#include "dpif-provider.h"
#include "dummy.h"
#include "dynamic-string.h"
#include "fat-rwlock.h"
#include "flow.h"
#include "cmap.h"
#include "latch.h"
#include "list.h"
#include "meta-flow.h"
#include "netdev.h"
#include "netdev-dpdk.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "odp-execute.h"
#include "odp-util.h"
#include "ofp-print.h"
#include "ofpbuf.h"
#include "ovs-numa.h"
#include "ovs-rcu.h"
#include "packet-dpif.h"
#include "packets.h"
#include "poll-loop.h"
#include "random.h"
#include "seq.h"
#include "shash.h"
#include "sset.h"
#include "timeval.h"
#include "unixctl.h"
#include "util.h"
#include "vlog.h"

VLOG_DEFINE_THIS_MODULE(dpif_netdev);

/* By default, choose a priority in the middle. */
#define NETDEV_RULE_PRIORITY 0x8000

#define FLOW_DUMP_MAX_BATCH 50
/* Use per thread recirc_depth to prevent recirculation loop. */
#define MAX_RECIRC_DEPTH 5
DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)

/* Configuration parameters. */
enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */

/* Protects against changes to 'dp_netdevs'. */
static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;

/* Contains all 'struct dp_netdev's. */
static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
    = SHASH_INITIALIZER(&dp_netdevs);

static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);

/* Stores a miniflow with inline values */

/* There are fields in the flow structure that we never use. Therefore we can
 * save a few words of memory */
#define NETDEV_KEY_BUF_SIZE_U32 (FLOW_U32S                 \
                                 - MINI_N_INLINE           \
                                 - FLOW_U32_SIZE(regs)     \
                                 - FLOW_U32_SIZE(metadata) \
                                )
struct netdev_flow_key {
    struct miniflow flow;
    uint32_t buf[NETDEV_KEY_BUF_SIZE_U32];
};

/* Exact match cache for frequently used flows
 *
 * The cache uses a 32-bit hash of the packet (which can be the RSS hash) to
 * search its entries for a miniflow that matches exactly the miniflow of the
 * packet. It stores the 'cls_rule'(rule) that matches the miniflow.
 *
 * A cache entry holds a reference to its 'dp_netdev_flow'.
 *
 * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS different
 * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values (each of
 * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown away). Each
 * value is the index of a cache entry where the miniflow could be.
 *
 *
 * Thread-safety
 * =============
 *
 * Each pmd_thread has its own private exact match cache.
 * If dp_netdev_input is not called from a pmd thread, a mutex is used.
 */

#define EM_FLOW_HASH_SHIFT 10
#define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT)
#define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1)
#define EM_FLOW_HASH_SEGS 2

struct emc_entry {
    uint32_t hash;
    uint32_t mf_len;
    struct netdev_flow_key mf;
    struct dp_netdev_flow *flow;
};

struct emc_cache {
    struct emc_entry entries[EM_FLOW_HASH_ENTRIES];
};

/* Iterate in the exact match cache through every entry that might contain a
 * miniflow with hash 'HASH'. */
#define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH)                 \
    for (uint32_t i__ = 0, srch_hash__ = (HASH);                             \
         (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & EM_FLOW_HASH_MASK], \
         i__ < EM_FLOW_HASH_SEGS;                                            \
         i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT)

/* Datapath based on the network device interface from netdev.h.
 *
 *
 * Thread-safety
 * =============
 *
 * Some members, marked 'const', are immutable.  Accessing other members
 * requires synchronization, as noted in more detail below.
 *
 * Acquisition order is, from outermost to innermost:
 *
 *    dp_netdev_mutex (global)
 *    port_mutex
 *    flow_mutex
 */
struct dp_netdev {
    const struct dpif_class *const class;
    const char *const name;
    struct dpif *dpif;
    struct ovs_refcount ref_cnt;
    atomic_flag destroyed;

    /* Flows.
     *
     * Writers of 'flow_table' must take the 'flow_mutex'.  Corresponding
     * changes to 'cls' must be made while still holding the 'flow_mutex'.
     */
    struct ovs_mutex flow_mutex;
    struct classifier cls;
    struct cmap flow_table OVS_GUARDED; /* Flow table. */

    /* Statistics.
     *
     * ovsthread_stats is internally synchronized. */
    struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */

    /* Ports.
     *
     * Protected by RCU.  Take the mutex to add or remove ports. */
    struct ovs_mutex port_mutex;
    struct cmap ports;
    struct seq *port_seq;       /* Incremented whenever a port changes. */

    /* Protects access to ofproto-dpif-upcall interface during revalidator
     * thread synchronization. */
    struct fat_rwlock upcall_rwlock;
    upcall_callback *upcall_cb;  /* Callback function for executing upcalls. */
    void *upcall_aux;

    /* Stores all 'struct dp_netdev_pmd_thread's. */
    struct cmap poll_threads;

    /* Protects the access of the 'struct dp_netdev_pmd_thread'
     * instance for non-pmd thread. */
    struct ovs_mutex non_pmd_mutex;

    /* Each pmd thread will store its pointer to
     * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */
    ovsthread_key_t per_pmd_key;
};

static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                    odp_port_t);

enum dp_stat_type {
    DP_STAT_HIT,                /* Packets that matched in the flow table. */
    DP_STAT_MISS,               /* Packets that did not match. */
    DP_STAT_LOST,               /* Packets not passed up to the client. */
    DP_N_STATS
};

/* Contained by struct dp_netdev's 'stats' member.  */
struct dp_netdev_stats {
    struct ovs_mutex mutex;          /* Protects 'n'. */

    /* Indexed by DP_STAT_*, protected by 'mutex'. */
    unsigned long long int n[DP_N_STATS] OVS_GUARDED;
};


/* A port in a netdev-based datapath. */
struct dp_netdev_port {
    struct cmap_node node;      /* Node in dp_netdev's 'ports'. */
    odp_port_t port_no;
    struct netdev *netdev;
    struct netdev_saved_flags *sf;
    struct netdev_rxq **rxq;
    struct ovs_refcount ref_cnt;
    char *type;                 /* Port type as requested by user. */
};

/* A flow in dp_netdev's 'flow_table'.
 *
 *
 * Thread-safety
 * =============
 *
 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
 * its dp_netdev's classifier.  The text below calls this classifier 'cls'.
 *
 * Motivation
 * ----------
 *
 * The thread safety rules described here for "struct dp_netdev_flow" are
 * motivated by two goals:
 *
 *    - Prevent threads that read members of "struct dp_netdev_flow" from
 *      reading bad data due to changes by some thread concurrently modifying
 *      those members.
 *
 *    - Prevent two threads making changes to members of a given "struct
 *      dp_netdev_flow" from interfering with each other.
 *
 *
 * Rules
 * -----
 *
 * A flow 'flow' may be accessed without a risk of being freed during an RCU
 * grace period.  Code that needs to hold onto a flow for a while
 * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref().
 *
 * 'flow->ref_cnt' protects 'flow' from being freed.  It doesn't protect the
 * flow from being deleted from 'cls' and it doesn't protect members of 'flow'
 * from modification.
 *
 * Some members, marked 'const', are immutable.  Accessing other members
 * requires synchronization, as noted in more detail below.
 */
struct dp_netdev_flow {
    bool dead;
    /* Packet classification. */
    const struct cls_rule cr;   /* In owning dp_netdev's 'cls'. */

    /* Hash table index by unmasked flow. */
    const struct cmap_node node; /* In owning dp_netdev's 'flow_table'. */
    const struct flow flow;      /* The flow that created this entry. */

    /* Number of references.
     * The classifier owns one reference.
     * Any thread trying to keep a rule from being freed should hold its own
     * reference. */
    struct ovs_refcount ref_cnt;

    /* Statistics.
     *
     * Reading or writing these members requires 'mutex'. */
    struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */

    /* Actions. */
    OVSRCU_TYPE(struct dp_netdev_actions *) actions;
};

static void dp_netdev_flow_unref(struct dp_netdev_flow *);
static bool dp_netdev_flow_ref(struct dp_netdev_flow *);

/* Contained by struct dp_netdev_flow's 'stats' member.  */
struct dp_netdev_flow_stats {
    struct ovs_mutex mutex;         /* Guards all the other members. */

    long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
    long long int packet_count OVS_GUARDED; /* Number of packets matched. */
    long long int byte_count OVS_GUARDED;   /* Number of bytes matched. */
    uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
};

/* A set of datapath actions within a "struct dp_netdev_flow".
 *
 *
 * Thread-safety
 * =============
 *
 * A struct dp_netdev_actions 'actions' is protected with RCU. */
struct dp_netdev_actions {
    /* These members are immutable: they do not change during the struct's
     * lifetime.  */
    struct nlattr *actions;     /* Sequence of OVS_ACTION_ATTR_* attributes. */
    unsigned int size;          /* Size of 'actions', in bytes. */
};

struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
                                                   size_t);
struct dp_netdev_actions *dp_netdev_flow_get_actions(
    const struct dp_netdev_flow *);
static void dp_netdev_actions_free(struct dp_netdev_actions *);

/* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
 * the performance overhead of interrupt processing.  Therefore netdev can
 * not implement rx-wait for these devices.  dpif-netdev needs to poll
 * these device to check for recv buffer.  pmd-thread does polling for
 * devices assigned to itself thread.
 *
 * DPDK used PMD for accessing NIC.
 *
 * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for
 * I/O of all non-pmd threads.  There will be no actual thread created
 * for the instance.
 **/
struct dp_netdev_pmd_thread {
    struct dp_netdev *dp;
    struct cmap_node node;          /* In 'dp->poll_threads'. */
    /* Per thread exact-match cache.  Note, the instance for cpu core
     * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
     * need to be protected (e.g. by 'dp_netdev_mutex').  All other
     * instances will only be accessed by its own pmd thread. */
    struct emc_cache flow_cache;
    struct latch exit_latch;        /* For terminating the pmd thread. */
    atomic_uint change_seq;         /* For reloading pmd ports. */
    pthread_t thread;
    int index;                      /* Idx of this pmd thread among pmd*/
                                    /* threads on same numa node. */
    int core_id;                    /* CPU core id of this pmd thread. */
    int numa_id;                    /* numa node id of this pmd thread. */
};

#define PMD_INITIAL_SEQ 1

/* Interface to netdev-based datapath. */
struct dpif_netdev {
    struct dpif dpif;
    struct dp_netdev *dp;
    uint64_t last_port_seq;
};

static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
                              struct dp_netdev_port **portp);
static int get_port_by_name(struct dp_netdev *dp, const char *devname,
                            struct dp_netdev_port **portp);
static void dp_netdev_free(struct dp_netdev *)
    OVS_REQUIRES(dp_netdev_mutex);
static void dp_netdev_flow_flush(struct dp_netdev *);
static int do_add_port(struct dp_netdev *dp, const char *devname,
                       const char *type, odp_port_t port_no)
    OVS_REQUIRES(dp->port_mutex);
static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
    OVS_REQUIRES(dp->port_mutex);
static int dpif_netdev_open(const struct dpif_class *, const char *name,
                            bool create, struct dpif **);
static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                                      struct dpif_packet **, int c,
                                      bool may_steal, struct pkt_metadata *,
                                      const struct nlattr *actions,
                                      size_t actions_len);
static void dp_netdev_input(struct dp_netdev_pmd_thread *,
                            struct dpif_packet **, int cnt,
                            struct pkt_metadata *);
static void dp_netdev_disable_upcall(struct dp_netdev *);
static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
                                    struct dp_netdev *dp, int index,
                                    int core_id, int numa_id);
static struct dp_netdev_pmd_thread *dp_netdev_get_nonpmd(struct dp_netdev *dp);
static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);

static void emc_clear_entry(struct emc_entry *ce);

static void
emc_cache_init(struct emc_cache *flow_cache)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
        flow_cache->entries[i].flow = NULL;
        flow_cache->entries[i].hash = 0;
        flow_cache->entries[i].mf_len = 0;
        miniflow_initialize(&flow_cache->entries[i].mf.flow,
                            flow_cache->entries[i].mf.buf);
    }
}

static void
emc_cache_uninit(struct emc_cache *flow_cache)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
        emc_clear_entry(&flow_cache->entries[i]);
    }
}

static struct dpif_netdev *
dpif_netdev_cast(const struct dpif *dpif)
{
    ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
    return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
}

static struct dp_netdev *
get_dp_netdev(const struct dpif *dpif)
{
    return dpif_netdev_cast(dpif)->dp;
}

static int
dpif_netdev_enumerate(struct sset *all_dps,
                      const struct dpif_class *dpif_class)
{
    struct shash_node *node;

    ovs_mutex_lock(&dp_netdev_mutex);
    SHASH_FOR_EACH(node, &dp_netdevs) {
        struct dp_netdev *dp = node->data;
        if (dpif_class != dp->class) {
            /* 'dp_netdevs' contains both "netdev" and "dummy" dpifs.
             * If the class doesn't match, skip this dpif. */
             continue;
        }
        sset_add(all_dps, node->name);
    }
    ovs_mutex_unlock(&dp_netdev_mutex);

    return 0;
}

static bool
dpif_netdev_class_is_dummy(const struct dpif_class *class)
{
    return class != &dpif_netdev_class;
}

static const char *
dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
{
    return strcmp(type, "internal") ? type
                  : dpif_netdev_class_is_dummy(class) ? "dummy"
                  : "tap";
}

static struct dpif *
create_dpif_netdev(struct dp_netdev *dp)
{
    uint16_t netflow_id = hash_string(dp->name, 0);
    struct dpif_netdev *dpif;

    ovs_refcount_ref(&dp->ref_cnt);

    dpif = xmalloc(sizeof *dpif);
    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
    dpif->dp = dp;
    dpif->last_port_seq = seq_read(dp->port_seq);

    return &dpif->dpif;
}

/* Choose an unused, non-zero port number and return it on success.
 * Return ODPP_NONE on failure. */
static odp_port_t
choose_port(struct dp_netdev *dp, const char *name)
    OVS_REQUIRES(dp->port_mutex)
{
    uint32_t port_no;

    if (dp->class != &dpif_netdev_class) {
        const char *p;
        int start_no = 0;

        /* If the port name begins with "br", start the number search at
         * 100 to make writing tests easier. */
        if (!strncmp(name, "br", 2)) {
            start_no = 100;
        }

        /* If the port name contains a number, try to assign that port number.
         * This can make writing unit tests easier because port numbers are
         * predictable. */
        for (p = name; *p != '\0'; p++) {
            if (isdigit((unsigned char) *p)) {
                port_no = start_no + strtol(p, NULL, 10);
                if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
                    && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
                    return u32_to_odp(port_no);
                }
                break;
            }
        }
    }

    for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
        if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
            return u32_to_odp(port_no);
        }
    }

    return ODPP_NONE;
}

static int
create_dp_netdev(const char *name, const struct dpif_class *class,
                 struct dp_netdev **dpp)
    OVS_REQUIRES(dp_netdev_mutex)
{
    struct dp_netdev *dp;
    struct dp_netdev_pmd_thread *non_pmd;
    int error;

    dp = xzalloc(sizeof *dp);
    shash_add(&dp_netdevs, name, dp);

    *CONST_CAST(const struct dpif_class **, &dp->class) = class;
    *CONST_CAST(const char **, &dp->name) = xstrdup(name);
    ovs_refcount_init(&dp->ref_cnt);
    atomic_flag_clear(&dp->destroyed);

    ovs_mutex_init(&dp->flow_mutex);
    classifier_init(&dp->cls, NULL);
    cmap_init(&dp->flow_table);

    ovsthread_stats_init(&dp->stats);

    ovs_mutex_init(&dp->port_mutex);
    cmap_init(&dp->ports);
    dp->port_seq = seq_create();
    fat_rwlock_init(&dp->upcall_rwlock);

    /* Disable upcalls by default. */
    dp_netdev_disable_upcall(dp);
    dp->upcall_aux = NULL;
    dp->upcall_cb = NULL;

    cmap_init(&dp->poll_threads);
    ovs_mutex_init_recursive(&dp->non_pmd_mutex);
    ovsthread_key_create(&dp->per_pmd_key, NULL);

    /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */
    ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID);
    non_pmd = xzalloc(sizeof *non_pmd);
    dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID,
                            OVS_NUMA_UNSPEC);

    ovs_mutex_lock(&dp->port_mutex);
    error = do_add_port(dp, name, "internal", ODPP_LOCAL);
    ovs_mutex_unlock(&dp->port_mutex);
    if (error) {
        dp_netdev_free(dp);
        return error;
    }

    *dpp = dp;
    return 0;
}

static int
dpif_netdev_open(const struct dpif_class *class, const char *name,
                 bool create, struct dpif **dpifp)
{
    struct dp_netdev *dp;
    int error;

    ovs_mutex_lock(&dp_netdev_mutex);
    dp = shash_find_data(&dp_netdevs, name);
    if (!dp) {
        error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
    } else {
        error = (dp->class != class ? EINVAL
                 : create ? EEXIST
                 : 0);
    }
    if (!error) {
        *dpifp = create_dpif_netdev(dp);
        dp->dpif = *dpifp;
    }
    ovs_mutex_unlock(&dp_netdev_mutex);

    return error;
}

/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
 * through the 'dp_netdevs' shash while freeing 'dp'. */
static void
dp_netdev_free(struct dp_netdev *dp)
    OVS_REQUIRES(dp_netdev_mutex)
{
    struct dp_netdev_port *port;
    struct dp_netdev_stats *bucket;
    int i;

    shash_find_and_delete(&dp_netdevs, dp->name);

    dp_netdev_destroy_all_pmds(dp);
    ovs_mutex_destroy(&dp->non_pmd_mutex);
    ovsthread_key_delete(dp->per_pmd_key);

    dp_netdev_flow_flush(dp);
    ovs_mutex_lock(&dp->port_mutex);
    CMAP_FOR_EACH (port, node, &dp->ports) {
        do_del_port(dp, port);
    }
    ovs_mutex_unlock(&dp->port_mutex);

    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
        ovs_mutex_destroy(&bucket->mutex);
        free_cacheline(bucket);
    }
    ovsthread_stats_destroy(&dp->stats);

    classifier_destroy(&dp->cls);
    cmap_destroy(&dp->flow_table);
    ovs_mutex_destroy(&dp->flow_mutex);
    seq_destroy(dp->port_seq);
    cmap_destroy(&dp->ports);
    fat_rwlock_destroy(&dp->upcall_rwlock);

    free(CONST_CAST(char *, dp->name));
    free(dp);
}

static void
dp_netdev_unref(struct dp_netdev *dp)
{
    if (dp) {
        /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
         * get a new reference to 'dp' through the 'dp_netdevs' shash. */
        ovs_mutex_lock(&dp_netdev_mutex);
        if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) {
            dp_netdev_free(dp);
        }
        ovs_mutex_unlock(&dp_netdev_mutex);
    }
}

static void
dpif_netdev_close(struct dpif *dpif)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    dp_netdev_unref(dp);
    free(dpif);
}

static int
dpif_netdev_destroy(struct dpif *dpif)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    if (!atomic_flag_test_and_set(&dp->destroyed)) {
        if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) {
            /* Can't happen: 'dpif' still owns a reference to 'dp'. */
            OVS_NOT_REACHED();
        }
    }

    return 0;
}

static int
dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_stats *bucket;
    size_t i;

    stats->n_flows = cmap_count(&dp->flow_table);

    stats->n_hit = stats->n_missed = stats->n_lost = 0;
    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
        ovs_mutex_lock(&bucket->mutex);
        stats->n_hit += bucket->n[DP_STAT_HIT];
        stats->n_missed += bucket->n[DP_STAT_MISS];
        stats->n_lost += bucket->n[DP_STAT_LOST];
        ovs_mutex_unlock(&bucket->mutex);
    }
    stats->n_masks = UINT32_MAX;
    stats->n_mask_hit = UINT64_MAX;

    return 0;
}

static void
dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
{
    int old_seq;

    atomic_add_relaxed(&pmd->change_seq, 1, &old_seq);
}

/* Causes all pmd threads to reload its tx/rx devices.
 * Must be called after adding/removing ports. */
static void
dp_netdev_reload_pmds(struct dp_netdev *dp)
{
    struct dp_netdev_pmd_thread *pmd;

    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
        dp_netdev_reload_pmd__(pmd);
    }
}

static uint32_t
hash_port_no(odp_port_t port_no)
{
    return hash_int(odp_to_u32(port_no), 0);
}

static int
do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
            odp_port_t port_no)
    OVS_REQUIRES(dp->port_mutex)
{
    struct netdev_saved_flags *sf;
    struct dp_netdev_port *port;
    struct netdev *netdev;
    enum netdev_flags flags;
    const char *open_type;
    int error;
    int i;

    /* XXX reject devices already in some dp_netdev. */

    /* Open and validate network device. */
    open_type = dpif_netdev_port_open_type(dp->class, type);
    error = netdev_open(devname, open_type, &netdev);
    if (error) {
        return error;
    }
    /* XXX reject non-Ethernet devices */

    netdev_get_flags(netdev, &flags);
    if (flags & NETDEV_LOOPBACK) {
        VLOG_ERR("%s: cannot add a loopback device", devname);
        netdev_close(netdev);
        return EINVAL;
    }

    if (netdev_is_pmd(netdev)) {
        int n_cores = ovs_numa_get_n_cores();

        if (n_cores == OVS_CORE_UNSPEC) {
            VLOG_ERR("%s, cannot get cpu core info", devname);
            return ENOENT;
        }
        /* There can only be ovs_numa_get_n_cores() pmd threads,
         * so creates a tx_q for each. */
        error = netdev_set_multiq(netdev, n_cores, NR_QUEUE);
        if (error) {
            VLOG_ERR("%s, cannot set multiq", devname);
            return errno;
        }
    }
    port = xzalloc(sizeof *port);
    port->port_no = port_no;
    port->netdev = netdev;
    port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
    port->type = xstrdup(type);
    for (i = 0; i < netdev_n_rxq(netdev); i++) {
        error = netdev_rxq_open(netdev, &port->rxq[i], i);
        if (error
            && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
            VLOG_ERR("%s: cannot receive packets on this network device (%s)",
                     devname, ovs_strerror(errno));
            netdev_close(netdev);
            free(port->type);
            free(port->rxq);
            free(port);
            return error;
        }
    }

    error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
    if (error) {
        for (i = 0; i < netdev_n_rxq(netdev); i++) {
            netdev_rxq_close(port->rxq[i]);
        }
        netdev_close(netdev);
        free(port->type);
        free(port->rxq);
        free(port);
        return error;
    }
    port->sf = sf;

    if (netdev_is_pmd(netdev)) {
        dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
        dp_netdev_reload_pmds(dp);
    }
    ovs_refcount_init(&port->ref_cnt);

    cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
    seq_change(dp->port_seq);

    return 0;
}

static int
dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
                     odp_port_t *port_nop)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
    const char *dpif_port;
    odp_port_t port_no;
    int error;

    ovs_mutex_lock(&dp->port_mutex);
    dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
    if (*port_nop != ODPP_NONE) {
        port_no = *port_nop;
        error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
    } else {
        port_no = choose_port(dp, dpif_port);
        error = port_no == ODPP_NONE ? EFBIG : 0;
    }
    if (!error) {
        *port_nop = port_no;
        error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
    }
    ovs_mutex_unlock(&dp->port_mutex);

    return error;
}

static int
dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    int error;

    ovs_mutex_lock(&dp->port_mutex);
    if (port_no == ODPP_LOCAL) {
        error = EINVAL;
    } else {
        struct dp_netdev_port *port;

        error = get_port_by_number(dp, port_no, &port);
        if (!error) {
            do_del_port(dp, port);
        }
    }
    ovs_mutex_unlock(&dp->port_mutex);

    return error;
}

static bool
is_valid_port_number(odp_port_t port_no)
{
    return port_no != ODPP_NONE;
}

static struct dp_netdev_port *
dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
{
    struct dp_netdev_port *port;

    CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
        if (port->port_no == port_no) {
            return port;
        }
    }
    return NULL;
}

static int
get_port_by_number(struct dp_netdev *dp,
                   odp_port_t port_no, struct dp_netdev_port **portp)
{
    if (!is_valid_port_number(port_no)) {
        *portp = NULL;
        return EINVAL;
    } else {
        *portp = dp_netdev_lookup_port(dp, port_no);
        return *portp ? 0 : ENOENT;
    }
}

static void
port_ref(struct dp_netdev_port *port)
{
    if (port) {
        ovs_refcount_ref(&port->ref_cnt);
    }
}

static bool
port_try_ref(struct dp_netdev_port *port)
{
    if (port) {
        return ovs_refcount_try_ref_rcu(&port->ref_cnt);
    }

    return false;
}

static void
port_destroy__(struct dp_netdev_port *port)
{
    int n_rxq = netdev_n_rxq(port->netdev);
    int i;

    netdev_close(port->netdev);
    netdev_restore_flags(port->sf);

    for (i = 0; i < n_rxq; i++) {
        netdev_rxq_close(port->rxq[i]);
    }
    free(port->rxq);
    free(port->type);
    free(port);
}

static void
port_unref(struct dp_netdev_port *port)
{
    if (port && ovs_refcount_unref_relaxed(&port->ref_cnt) == 1) {
        ovsrcu_postpone(port_destroy__, port);
    }
}

static int
get_port_by_name(struct dp_netdev *dp,
                 const char *devname, struct dp_netdev_port **portp)
    OVS_REQUIRES(dp->port_mutex)
{
    struct dp_netdev_port *port;

    CMAP_FOR_EACH (port, node, &dp->ports) {
        if (!strcmp(netdev_get_name(port->netdev), devname)) {
            *portp = port;
            return 0;
        }
    }
    return ENOENT;
}

static int
get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id)
{
    struct dp_netdev_pmd_thread *pmd;
    int n_pmds = 0;

    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
        if (pmd->numa_id == numa_id) {
            n_pmds++;
        }
    }

    return n_pmds;
}

/* Returns 'true' if there is a port with pmd netdev and the netdev
 * is on numa node 'numa_id'. */
static bool
has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id)
{
    struct dp_netdev_port *port;

    CMAP_FOR_EACH (port, node, &dp->ports) {
        if (netdev_is_pmd(port->netdev)
            && netdev_get_numa_id(port->netdev) == numa_id) {
            return true;
        }
    }

    return false;
}


static void
do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
    OVS_REQUIRES(dp->port_mutex)
{
    cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
    seq_change(dp->port_seq);
    if (netdev_is_pmd(port->netdev)) {
        int numa_id = netdev_get_numa_id(port->netdev);

        /* If there is no netdev on the numa node, deletes the pmd threads
         * for that numa.  Else, just reloads the queues.  */
        if (!has_pmd_port_for_numa(dp, numa_id)) {
            dp_netdev_del_pmds_on_numa(dp, numa_id);
        }
        dp_netdev_reload_pmds(dp);
    }

    port_unref(port);
}

static void
answer_port_query(const struct dp_netdev_port *port,
                  struct dpif_port *dpif_port)
{
    dpif_port->name = xstrdup(netdev_get_name(port->netdev));
    dpif_port->type = xstrdup(port->type);
    dpif_port->port_no = port->port_no;
}

static int
dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
                                 struct dpif_port *dpif_port)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_port *port;
    int error;

    error = get_port_by_number(dp, port_no, &port);
    if (!error && dpif_port) {
        answer_port_query(port, dpif_port);
    }

    return error;
}

static int
dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
                               struct dpif_port *dpif_port)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_port *port;
    int error;

    ovs_mutex_lock(&dp->port_mutex);
    error = get_port_by_name(dp, devname, &port);
    if (!error && dpif_port) {
        answer_port_query(port, dpif_port);
    }
    ovs_mutex_unlock(&dp->port_mutex);

    return error;
}

static void
dp_netdev_flow_free(struct dp_netdev_flow *flow)
{
    struct dp_netdev_flow_stats *bucket;
    size_t i;

    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
        ovs_mutex_destroy(&bucket->mutex);
        free_cacheline(bucket);
    }
    ovsthread_stats_destroy(&flow->stats);

    cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
    dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
    free(flow);
}

static void dp_netdev_flow_unref(struct dp_netdev_flow *flow)
{
    if (ovs_refcount_unref_relaxed(&flow->ref_cnt) == 1) {
        ovsrcu_postpone(dp_netdev_flow_free, flow);
    }
}

static void
dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
    OVS_REQUIRES(dp->flow_mutex)
{
    struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
    struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node);

    classifier_remove(&dp->cls, cr);
    cmap_remove(&dp->flow_table, node, flow_hash(&flow->flow, 0));
    flow->dead = true;

    dp_netdev_flow_unref(flow);
}

static void
dp_netdev_flow_flush(struct dp_netdev *dp)
{
    struct dp_netdev_flow *netdev_flow;

    ovs_mutex_lock(&dp->flow_mutex);
    CMAP_FOR_EACH (netdev_flow, node, &dp->flow_table) {
        dp_netdev_remove_flow(dp, netdev_flow);
    }
    ovs_mutex_unlock(&dp->flow_mutex);
}

static int
dpif_netdev_flow_flush(struct dpif *dpif)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    dp_netdev_flow_flush(dp);
    return 0;
}

struct dp_netdev_port_state {
    struct cmap_position position;
    char *name;
};

static int
dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
{
    *statep = xzalloc(sizeof(struct dp_netdev_port_state));
    return 0;
}

static int
dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
                           struct dpif_port *dpif_port)
{
    struct dp_netdev_port_state *state = state_;
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct cmap_node *node;
    int retval;

    node = cmap_next_position(&dp->ports, &state->position);
    if (node) {
        struct dp_netdev_port *port;

        port = CONTAINER_OF(node, struct dp_netdev_port, node);

        free(state->name);
        state->name = xstrdup(netdev_get_name(port->netdev));
        dpif_port->name = state->name;
        dpif_port->type = port->type;
        dpif_port->port_no = port->port_no;

        retval = 0;
    } else {
        retval = EOF;
    }

    return retval;
}

static int
dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
{
    struct dp_netdev_port_state *state = state_;
    free(state->name);
    free(state);
    return 0;
}

static int
dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
{
    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
    uint64_t new_port_seq;
    int error;

    new_port_seq = seq_read(dpif->dp->port_seq);
    if (dpif->last_port_seq != new_port_seq) {
        dpif->last_port_seq = new_port_seq;
        error = ENOBUFS;
    } else {
        error = EAGAIN;
    }

    return error;
}

static void
dpif_netdev_port_poll_wait(const struct dpif *dpif_)
{
    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);

    seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
}

static struct dp_netdev_flow *
dp_netdev_flow_cast(const struct cls_rule *cr)
{
    return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
}

static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow)
{
    return ovs_refcount_try_ref_rcu(&flow->ref_cnt);
}

/* netdev_flow_key utilities.
 *
 * netdev_flow_key is basically a miniflow.  We use these functions
 * (netdev_flow_key_clone, netdev_flow_key_equal, ...) instead of the miniflow
 * functions (miniflow_clone_inline, miniflow_equal, ...), because:
 *
 * - Since we are dealing exclusively with miniflows created by
 *   miniflow_extract(), if the map is different the miniflow is different.
 *   Therefore we can be faster by comparing the map and the miniflow in a
 *   single memcmp().
 * _ netdev_flow_key's miniflow has always inline values.
 * - These functions can be inlined by the compiler.
 *
 * The following assertions make sure that what we're doing with miniflow is
 * safe
 */
BUILD_ASSERT_DECL(offsetof(struct miniflow, inline_values)
                  == sizeof(uint64_t));
BUILD_ASSERT_DECL(offsetof(struct netdev_flow_key, flow) == 0);

static inline struct netdev_flow_key *
miniflow_to_netdev_flow_key(const struct miniflow *mf)
{
    return (struct netdev_flow_key *) CONST_CAST(struct miniflow *, mf);
}

/* Given the number of bits set in the miniflow map, returns the size of the
 * netdev_flow key */
static inline uint32_t
netdev_flow_key_size(uint32_t flow_u32s)
{
    return MINIFLOW_VALUES_SIZE(flow_u32s)
           + offsetof(struct miniflow, inline_values);
}

/* Used to compare 'netdev_flow_key's (miniflows) in the exact match cache. */
static inline bool
netdev_flow_key_equal(const struct netdev_flow_key *a,
                      const struct netdev_flow_key *b,
                      uint32_t size)
{
    return !memcmp(a, b, size);
}

static inline void
netdev_flow_key_clone(struct netdev_flow_key *dst,
                      const struct netdev_flow_key *src,
                      uint32_t size)
{
    memcpy(dst, src, size);
}

static inline bool
emc_entry_alive(struct emc_entry *ce)
{
    return ce->flow && !ce->flow->dead;
}

static void
emc_clear_entry(struct emc_entry *ce)
{
    if (ce->flow) {
        dp_netdev_flow_unref(ce->flow);
        ce->flow = NULL;
    }
}

static inline void
emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
                 const struct netdev_flow_key *mf, uint32_t hash)
{
    if (ce->flow != flow) {
        if (ce->flow) {
            dp_netdev_flow_unref(ce->flow);
        }

        if (dp_netdev_flow_ref(flow)) {
            ce->flow = flow;
        } else {
            ce->flow = NULL;
        }
    }
    if (mf) {
        uint32_t mf_len = netdev_flow_key_size(count_1bits(mf->flow.map));

        netdev_flow_key_clone(&ce->mf, mf, mf_len);
        ce->hash = hash;
        ce->mf_len = mf_len;
    }
}

static inline void
emc_insert(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash,
           struct dp_netdev_flow *flow)
{
    struct emc_entry *to_be_replaced = NULL;
    struct emc_entry *current_entry;

    EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) {
        if (current_entry->hash == hash
            && netdev_flow_key_equal(&current_entry->mf,
                                     miniflow_to_netdev_flow_key(mf),
                                     current_entry->mf_len)) {

            /* We found the entry with the 'mf' miniflow */
            emc_change_entry(current_entry, flow, NULL, 0);
            return;
        }

        /* Replacement policy: put the flow in an empty (not alive) entry, or
         * in the first entry where it can be */
        if (!to_be_replaced
            || (emc_entry_alive(to_be_replaced)
                && !emc_entry_alive(current_entry))
            || current_entry->hash < to_be_replaced->hash) {
            to_be_replaced = current_entry;
        }
    }
    /* We didn't find the miniflow in the cache.
     * The 'to_be_replaced' entry is where the new flow will be stored */

    emc_change_entry(to_be_replaced, flow, miniflow_to_netdev_flow_key(mf),
                     hash);
}

static inline struct dp_netdev_flow *
emc_lookup(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash)
{
    struct emc_entry *current_entry;

    EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) {
        if (current_entry->hash == hash && emc_entry_alive(current_entry)
            && netdev_flow_key_equal(&current_entry->mf,
                                     miniflow_to_netdev_flow_key(mf),
                                     current_entry->mf_len)) {

            /* We found the entry with the 'mf' miniflow */
            return current_entry->flow;
        }
    }

    return NULL;
}

static struct dp_netdev_flow *
dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key)
{
    struct dp_netdev_flow *netdev_flow;
    struct cls_rule *rule;

    classifier_lookup_miniflow_batch(&dp->cls, &key, &rule, 1);
    netdev_flow = dp_netdev_flow_cast(rule);

    return netdev_flow;
}

static struct dp_netdev_flow *
dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
{
    struct dp_netdev_flow *netdev_flow;

    CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
                             &dp->flow_table) {
        if (flow_equal(&netdev_flow->flow, flow)) {
            return netdev_flow;
        }
    }

    return NULL;
}

static void
get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow,
                    struct dpif_flow_stats *stats)
{
    struct dp_netdev_flow_stats *bucket;
    size_t i;

    memset(stats, 0, sizeof *stats);
    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
        ovs_mutex_lock(&bucket->mutex);
        stats->n_packets += bucket->packet_count;
        stats->n_bytes += bucket->byte_count;
        stats->used = MAX(stats->used, bucket->used);
        stats->tcp_flags |= bucket->tcp_flags;
        ovs_mutex_unlock(&bucket->mutex);
    }
}

static void
dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
                            struct ofpbuf *buffer, struct dpif_flow *flow)
{
    struct flow_wildcards wc;
    struct dp_netdev_actions *actions;

    minimask_expand(&netdev_flow->cr.match.mask, &wc);
    odp_flow_key_from_mask(buffer, &wc.masks, &netdev_flow->flow,
                           odp_to_u32(wc.masks.in_port.odp_port),
                           SIZE_MAX, true);
    flow->mask = ofpbuf_data(buffer);
    flow->mask_len = ofpbuf_size(buffer);

    actions = dp_netdev_flow_get_actions(netdev_flow);
    flow->actions = actions->actions;
    flow->actions_len = actions->size;

    get_dpif_flow_stats(netdev_flow, &flow->stats);
}

static int
dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
                              const struct nlattr *mask_key,
                              uint32_t mask_key_len, const struct flow *flow,
                              struct flow *mask)
{
    if (mask_key_len) {
        enum odp_key_fitness fitness;

        fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
        if (fitness) {
            /* This should not happen: it indicates that
             * odp_flow_key_from_mask() and odp_flow_key_to_mask()
             * disagree on the acceptable form of a mask.  Log the problem
             * as an error, with enough details to enable debugging. */
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

            if (!VLOG_DROP_ERR(&rl)) {
                struct ds s;

                ds_init(&s);
                odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
                                true);
                VLOG_ERR("internal error parsing flow mask %s (%s)",
                         ds_cstr(&s), odp_key_fitness_to_string(fitness));
                ds_destroy(&s);
            }

            return EINVAL;
        }
    } else {
        enum mf_field_id id;
        /* No mask key, unwildcard everything except fields whose
         * prerequisities are not met. */
        memset(mask, 0x0, sizeof *mask);

        for (id = 0; id < MFF_N_IDS; ++id) {
            /* Skip registers and metadata. */
            if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
                && id != MFF_METADATA) {
                const struct mf_field *mf = mf_from_id(id);
                if (mf_are_prereqs_ok(mf, flow)) {
                    mf_mask_field(mf, mask);
                }
            }
        }
    }

    /* Force unwildcard the in_port.
     *
     * We need to do this even in the case where we unwildcard "everything"
     * above because "everything" only includes the 16-bit OpenFlow port number
     * mask->in_port.ofp_port, which only covers half of the 32-bit datapath
     * port number mask->in_port.odp_port. */
    mask->in_port.odp_port = u32_to_odp(UINT32_MAX);

    return 0;
}

static int
dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
                              struct flow *flow)
{
    odp_port_t in_port;

    if (odp_flow_key_to_flow(key, key_len, flow)) {
        /* This should not happen: it indicates that odp_flow_key_from_flow()
         * and odp_flow_key_to_flow() disagree on the acceptable form of a
         * flow.  Log the problem as an error, with enough details to enable
         * debugging. */
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

        if (!VLOG_DROP_ERR(&rl)) {
            struct ds s;

            ds_init(&s);
            odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
            ds_destroy(&s);
        }

        return EINVAL;
    }

    in_port = flow->in_port.odp_port;
    if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
        return EINVAL;
    }

    return 0;
}

static int
dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    struct flow key;
    int error;

    error = dpif_netdev_flow_from_nlattrs(get->key, get->key_len, &key);
    if (error) {
        return error;
    }

    netdev_flow = dp_netdev_find_flow(dp, &key);

    if (netdev_flow) {
        dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->flow);
     } else {
        error = ENOENT;
    }

    return error;
}

static int
dp_netdev_flow_add(struct dp_netdev *dp, struct match *match,
                   const struct nlattr *actions, size_t actions_len)
    OVS_REQUIRES(dp->flow_mutex)
{
    struct dp_netdev_flow *netdev_flow;

    netdev_flow = xzalloc(sizeof *netdev_flow);
    *CONST_CAST(struct flow *, &netdev_flow->flow) = match->flow;

    ovs_refcount_init(&netdev_flow->ref_cnt);

    ovsthread_stats_init(&netdev_flow->stats);

    ovsrcu_set(&netdev_flow->actions,
               dp_netdev_actions_create(actions, actions_len));

    cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
                  match, NETDEV_RULE_PRIORITY);
    cmap_insert(&dp->flow_table,
                CONST_CAST(struct cmap_node *, &netdev_flow->node),
                flow_hash(&match->flow, 0));
    classifier_insert(&dp->cls,
                      CONST_CAST(struct cls_rule *, &netdev_flow->cr));

    if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
        struct ds ds = DS_EMPTY_INITIALIZER;

        ds_put_cstr(&ds, "flow_add: ");
        match_format(match, &ds, OFP_DEFAULT_PRIORITY);
        ds_put_cstr(&ds, ", actions:");
        format_odp_actions(&ds, actions, actions_len);

        VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds));

        ds_destroy(&ds);
    }

    return 0;
}

static void
clear_stats(struct dp_netdev_flow *netdev_flow)
{
    struct dp_netdev_flow_stats *bucket;
    size_t i;

    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
        ovs_mutex_lock(&bucket->mutex);
        bucket->used = 0;
        bucket->packet_count = 0;
        bucket->byte_count = 0;
        bucket->tcp_flags = 0;
        ovs_mutex_unlock(&bucket->mutex);
    }
}

static int
dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    struct miniflow miniflow;
    struct match match;
    int error;

    error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
    if (error) {
        return error;
    }
    error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
                                          put->mask, put->mask_len,
                                          &match.flow, &match.wc.masks);
    if (error) {
        return error;
    }
    miniflow_init(&miniflow, &match.flow);

    ovs_mutex_lock(&dp->flow_mutex);
    netdev_flow = dp_netdev_lookup_flow(dp, &miniflow);
    if (!netdev_flow) {
        if (put->flags & DPIF_FP_CREATE) {
            if (cmap_count(&dp->flow_table) < MAX_FLOWS) {
                if (put->stats) {
                    memset(put->stats, 0, sizeof *put->stats);
                }
                error = dp_netdev_flow_add(dp, &match, put->actions,
                                           put->actions_len);
            } else {
                error = EFBIG;
            }
        } else {
            error = ENOENT;
        }
    } else {
        if (put->flags & DPIF_FP_MODIFY
            && flow_equal(&match.flow, &netdev_flow->flow)) {
            struct dp_netdev_actions *new_actions;
            struct dp_netdev_actions *old_actions;

            new_actions = dp_netdev_actions_create(put->actions,
                                                   put->actions_len);

            old_actions = dp_netdev_flow_get_actions(netdev_flow);
            ovsrcu_set(&netdev_flow->actions, new_actions);

            if (put->stats) {
                get_dpif_flow_stats(netdev_flow, put->stats);
            }
            if (put->flags & DPIF_FP_ZERO_STATS) {
                clear_stats(netdev_flow);
            }

            ovsrcu_postpone(dp_netdev_actions_free, old_actions);
        } else if (put->flags & DPIF_FP_CREATE) {
            error = EEXIST;
        } else {
            /* Overlapping flow. */
            error = EINVAL;
        }
    }
    ovs_mutex_unlock(&dp->flow_mutex);
    miniflow_destroy(&miniflow);

    return error;
}

static int
dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    struct flow key;
    int error;

    error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
    if (error) {
        return error;
    }

    ovs_mutex_lock(&dp->flow_mutex);
    netdev_flow = dp_netdev_find_flow(dp, &key);
    if (netdev_flow) {
        if (del->stats) {
            get_dpif_flow_stats(netdev_flow, del->stats);
        }
        dp_netdev_remove_flow(dp, netdev_flow);
    } else {
        error = ENOENT;
    }
    ovs_mutex_unlock(&dp->flow_mutex);

    return error;
}

struct dpif_netdev_flow_dump {
    struct dpif_flow_dump up;
    struct cmap_position pos;
    int status;
    struct ovs_mutex mutex;
};

static struct dpif_netdev_flow_dump *
dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump)
{
    return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up);
}

static struct dpif_flow_dump *
dpif_netdev_flow_dump_create(const struct dpif *dpif_)
{
    struct dpif_netdev_flow_dump *dump;

    dump = xmalloc(sizeof *dump);
    dpif_flow_dump_init(&dump->up, dpif_);
    memset(&dump->pos, 0, sizeof dump->pos);
    dump->status = 0;
    ovs_mutex_init(&dump->mutex);

    return &dump->up;
}

static int
dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_)
{
    struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);

    ovs_mutex_destroy(&dump->mutex);
    free(dump);
    return 0;
}

struct dpif_netdev_flow_dump_thread {
    struct dpif_flow_dump_thread up;
    struct dpif_netdev_flow_dump *dump;
    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
};

static struct dpif_netdev_flow_dump_thread *
dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
{
    return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up);
}

static struct dpif_flow_dump_thread *
dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_)
{
    struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
    struct dpif_netdev_flow_dump_thread *thread;

    thread = xmalloc(sizeof *thread);
    dpif_flow_dump_thread_init(&thread->up, &dump->up);
    thread->dump = dump;
    return &thread->up;
}

static void
dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
{
    struct dpif_netdev_flow_dump_thread *thread
        = dpif_netdev_flow_dump_thread_cast(thread_);

    free(thread);
}

static int
dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
                           struct dpif_flow *flows, int max_flows)
{
    struct dpif_netdev_flow_dump_thread *thread
        = dpif_netdev_flow_dump_thread_cast(thread_);
    struct dpif_netdev_flow_dump *dump = thread->dump;
    struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
    struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
    struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
    int n_flows = 0;
    int i;

    ovs_mutex_lock(&dump->mutex);
    if (!dump->status) {
        for (n_flows = 0; n_flows < MIN(max_flows, FLOW_DUMP_MAX_BATCH);
             n_flows++) {
            struct cmap_node *node;

            node = cmap_next_position(&dp->flow_table, &dump->pos);
            if (!node) {
                dump->status = EOF;
                break;
            }
            netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow,
                                                 node);
        }
    }
    ovs_mutex_unlock(&dump->mutex);

    for (i = 0; i < n_flows; i++) {
        struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
        struct odputil_keybuf *keybuf = &thread->keybuf[i];
        struct dp_netdev_flow *netdev_flow = netdev_flows[i];
        struct dpif_flow *f = &flows[i];
        struct dp_netdev_actions *dp_actions;
        struct flow_wildcards wc;
        struct ofpbuf buf;

        minimask_expand(&netdev_flow->cr.match.mask, &wc);

        /* Key. */
        ofpbuf_use_stack(&buf, keybuf, sizeof *keybuf);
        odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks,
                               netdev_flow->flow.in_port.odp_port, true);
        f->key = ofpbuf_data(&buf);
        f->key_len = ofpbuf_size(&buf);

        /* Mask. */
        ofpbuf_use_stack(&buf, maskbuf, sizeof *maskbuf);
        odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
                               odp_to_u32(wc.masks.in_port.odp_port),
                               SIZE_MAX, true);
        f->mask = ofpbuf_data(&buf);
        f->mask_len = ofpbuf_size(&buf);

        /* Actions. */
        dp_actions = dp_netdev_flow_get_actions(netdev_flow);
        f->actions = dp_actions->actions;
        f->actions_len = dp_actions->size;

        /* Stats. */
        get_dpif_flow_stats(netdev_flow, &f->stats);
    }

    return n_flows;
}

static int
dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_pmd_thread *pmd;
    struct dpif_packet packet, *pp;
    struct pkt_metadata *md = &execute->md;

    if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN ||
        ofpbuf_size(execute->packet) > UINT16_MAX) {
        return EINVAL;
    }

    packet.ofpbuf = *execute->packet;
    pp = &packet;

    /* Tries finding the 'pmd'.  If NULL is returned, that means
     * the current thread is a non-pmd thread and should use
     * dp_netdev_get_nonpmd(). */
    pmd = ovsthread_getspecific(dp->per_pmd_key);
    if (!pmd) {
        pmd = dp_netdev_get_nonpmd(dp);
    }

    /* If the current thread is non-pmd thread, acquires
     * the 'non_pmd_mutex'. */
    if (pmd->core_id == NON_PMD_CORE_ID) {
        ovs_mutex_lock(&dp->non_pmd_mutex);
    }
    dp_netdev_execute_actions(pmd, &pp, 1, false, md, execute->actions,
                              execute->actions_len);
    if (pmd->core_id == NON_PMD_CORE_ID) {
        ovs_mutex_unlock(&dp->non_pmd_mutex);
    }

    /* Even though may_steal is set to false, some actions could modify or
     * reallocate the ofpbuf memory. We need to pass those changes to the
     * caller */
    *execute->packet = packet.ofpbuf;

    return 0;
}

static void
dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
{
    size_t i;

    for (i = 0; i < n_ops; i++) {
        struct dpif_op *op = ops[i];

        switch (op->type) {
        case DPIF_OP_FLOW_PUT:
            op->error = dpif_netdev_flow_put(dpif, &op->u.flow_put);
            break;

        case DPIF_OP_FLOW_DEL:
            op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del);
            break;

        case DPIF_OP_EXECUTE:
            op->error = dpif_netdev_execute(dpif, &op->u.execute);
            break;

        case DPIF_OP_FLOW_GET:
            op->error = dpif_netdev_flow_get(dpif, &op->u.flow_get);
            break;
        }
    }
}

static int
dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
                              uint32_t queue_id, uint32_t *priority)
{
    *priority = queue_id;
    return 0;
}


/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
 * 'ofpacts'. */
struct dp_netdev_actions *
dp_netdev_actions_create(const struct nlattr *actions, size_t size)
{
    struct dp_netdev_actions *netdev_actions;

    netdev_actions = xmalloc(sizeof *netdev_actions);
    netdev_actions->actions = xmemdup(actions, size);
    netdev_actions->size = size;

    return netdev_actions;
}

struct dp_netdev_actions *
dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
{
    return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
}

static void
dp_netdev_actions_free(struct dp_netdev_actions *actions)
{
    free(actions->actions);
    free(actions);
}


static void
dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
                           struct dp_netdev_port *port,
                           struct netdev_rxq *rxq)
{
    struct dpif_packet *packets[NETDEV_MAX_RX_BATCH];
    int error, cnt;

    error = netdev_rxq_recv(rxq, packets, &cnt);
    if (!error) {
        struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);

        *recirc_depth_get() = 0;
        dp_netdev_input(pmd, packets, cnt, &md);
    } else if (error != EAGAIN && error != EOPNOTSUPP) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

        VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
                    netdev_get_name(port->netdev), ovs_strerror(error));
    }
}

static void
dpif_netdev_run(struct dpif *dpif)
{
    struct dp_netdev_port *port;
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_nonpmd(dp);

    ovs_mutex_lock(&dp->non_pmd_mutex);
    CMAP_FOR_EACH (port, node, &dp->ports) {
        if (!netdev_is_pmd(port->netdev)) {
            int i;

            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
                dp_netdev_process_rxq_port(non_pmd, port, port->rxq[i]);
            }
        }
    }
    ovs_mutex_unlock(&dp->non_pmd_mutex);
}

static void
dpif_netdev_wait(struct dpif *dpif)
{
    struct dp_netdev_port *port;
    struct dp_netdev *dp = get_dp_netdev(dpif);

    ovs_mutex_lock(&dp_netdev_mutex);
    CMAP_FOR_EACH (port, node, &dp->ports) {
        if (!netdev_is_pmd(port->netdev)) {
            int i;

            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
                netdev_rxq_wait(port->rxq[i]);
            }
        }
    }
    ovs_mutex_unlock(&dp_netdev_mutex);
}

struct rxq_poll {
    struct dp_netdev_port *port;
    struct netdev_rxq *rx;
};

static int
pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
                struct rxq_poll **ppoll_list, int poll_cnt)
{
    struct rxq_poll *poll_list = *ppoll_list;
    struct dp_netdev_port *port;
    int n_pmds_on_numa, index, i;

    /* Simple scheduler for netdev rx polling. */
    for (i = 0; i < poll_cnt; i++) {
        port_unref(poll_list[i].port);
    }

    poll_cnt = 0;
    n_pmds_on_numa = get_n_pmd_threads_on_numa(pmd->dp, pmd->numa_id);
    index = 0;

    CMAP_FOR_EACH (port, node, &pmd->dp->ports) {
        /* Calls port_try_ref() to prevent the main thread
         * from deleting the port. */
        if (port_try_ref(port)) {
            if (netdev_is_pmd(port->netdev)
                && netdev_get_numa_id(port->netdev) == pmd->numa_id) {
                int i;

                for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
                    if ((index % n_pmds_on_numa) == pmd->index) {
                        poll_list = xrealloc(poll_list,
                                        sizeof *poll_list * (poll_cnt + 1));

                        port_ref(port);
                        poll_list[poll_cnt].port = port;
                        poll_list[poll_cnt].rx = port->rxq[i];
                        poll_cnt++;
                    }
                    index++;
                }
            }
            /* Unrefs the port_try_ref(). */
            port_unref(port);
        }
    }

    *ppoll_list = poll_list;
    return poll_cnt;
}

static void *
pmd_thread_main(void *f_)
{
    struct dp_netdev_pmd_thread *pmd = f_;
    unsigned int lc = 0;
    struct rxq_poll *poll_list;
    unsigned int port_seq = PMD_INITIAL_SEQ;
    int poll_cnt;
    int i;

    poll_cnt = 0;
    poll_list = NULL;

    /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
    ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
    pmd_thread_setaffinity_cpu(pmd->core_id);
reload:
    emc_cache_init(&pmd->flow_cache);
    poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);

    for (;;) {
        int i;

        for (i = 0; i < poll_cnt; i++) {
            dp_netdev_process_rxq_port(pmd, poll_list[i].port, poll_list[i].rx);
        }

        if (lc++ > 1024) {
            unsigned int seq;

            lc = 0;

            ovsrcu_quiesce();

            atomic_read_relaxed(&pmd->change_seq, &seq);
            if (seq != port_seq) {
                port_seq = seq;
                break;
            }
        }
    }

    emc_cache_uninit(&pmd->flow_cache);

    if (!latch_is_set(&pmd->exit_latch)){
        goto reload;
    }

    for (i = 0; i < poll_cnt; i++) {
         port_unref(poll_list[i].port);
    }

    free(poll_list);
    return NULL;
}

static void
dp_netdev_disable_upcall(struct dp_netdev *dp)
    OVS_ACQUIRES(dp->upcall_rwlock)
{
    fat_rwlock_wrlock(&dp->upcall_rwlock);
}

static void
dpif_netdev_disable_upcall(struct dpif *dpif)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    dp_netdev_disable_upcall(dp);
}

static void
dp_netdev_enable_upcall(struct dp_netdev *dp)
    OVS_RELEASES(dp->upcall_rwlock)
{
    fat_rwlock_unlock(&dp->upcall_rwlock);
}

static void
dpif_netdev_enable_upcall(struct dpif *dpif)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    dp_netdev_enable_upcall(dp);
}

/* Returns the pointer to the dp_netdev_pmd_thread for non-pmd threads. */
static struct dp_netdev_pmd_thread *
dp_netdev_get_nonpmd(struct dp_netdev *dp)
{
    struct dp_netdev_pmd_thread *pmd;
    struct cmap_node *pnode;

    pnode = cmap_find(&dp->poll_threads, hash_int(NON_PMD_CORE_ID, 0));
    ovs_assert(pnode);
    pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node);

    return pmd;
}

/* Configures the 'pmd' based on the input argument. */
static void
dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
                        int index, int core_id, int numa_id)
{
    pmd->dp = dp;
    pmd->index = index;
    pmd->core_id = core_id;
    pmd->numa_id = numa_id;
    latch_init(&pmd->exit_latch);
    atomic_init(&pmd->change_seq, PMD_INITIAL_SEQ);
    /* init the 'flow_cache' since there is no
     * actual thread created for NON_PMD_CORE_ID. */
    if (core_id == NON_PMD_CORE_ID) {
        emc_cache_init(&pmd->flow_cache);
    }
    cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
                hash_int(core_id, 0));
}

/* Stops the pmd thread, removes it from the 'dp->poll_threads'
 * and destroys the struct. */
static void
dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd)
{
    /* Uninit the 'flow_cache' since there is
     * no actual thread uninit it. */
    if (pmd->core_id == NON_PMD_CORE_ID) {
        emc_cache_uninit(&pmd->flow_cache);
    } else {
        latch_set(&pmd->exit_latch);
        dp_netdev_reload_pmd__(pmd);
        ovs_numa_unpin_core(pmd->core_id);
        xpthread_join(pmd->thread, NULL);
    }
    cmap_remove(&pmd->dp->poll_threads, &pmd->node, hash_int(pmd->core_id, 0));
    latch_destroy(&pmd->exit_latch);
    free(pmd);
}

/* Destroys all pmd threads. */
static void
dp_netdev_destroy_all_pmds(struct dp_netdev *dp)
{
    struct dp_netdev_pmd_thread *pmd;

    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
        dp_netdev_del_pmd(pmd);
    }
}

/* Deletes all pmd threads on numa node 'numa_id'. */
static void
dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
{
    struct dp_netdev_pmd_thread *pmd;

    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
        if (pmd->numa_id == numa_id) {
            dp_netdev_del_pmd(pmd);
        }
    }
}

/* Checks the numa node id of 'netdev' and starts pmd threads for
 * the numa node. */
static void
dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
{
    int n_pmds;

    if (!ovs_numa_numa_id_is_valid(numa_id)) {
        VLOG_ERR("Cannot create pmd threads due to numa id (%d)"
                 "invalid", numa_id);
        return ;
    }

    n_pmds = get_n_pmd_threads_on_numa(dp, numa_id);

    /* If there are already pmd threads created for the numa node
     * in which 'netdev' is on, do nothing.  Else, creates the
     * pmd threads for the numa node. */
    if (!n_pmds) {
        int can_have, n_unpinned, i;

        n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
        if (!n_unpinned) {
            VLOG_ERR("Cannot create pmd threads due to out of unpinned "
                     "cores on numa node");
            return;
        }

        /* Tries creating NR_PMD_THREADS pmd threads on the numa node. */
        can_have = MIN(n_unpinned, NR_PMD_THREADS);
        for (i = 0; i < can_have; i++) {
            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
            int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);

            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
            /* Each thread will distribute all devices rx-queues among
             * themselves. */
            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
        }
        VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
    }
}


static void *
dp_netdev_flow_stats_new_cb(void)
{
    struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
    ovs_mutex_init(&bucket->mutex);
    return bucket;
}

static void
dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
                    int cnt, int size,
                    uint16_t tcp_flags)
{
    long long int now = time_msec();
    struct dp_netdev_flow_stats *bucket;

    bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
                                        dp_netdev_flow_stats_new_cb);

    ovs_mutex_lock(&bucket->mutex);
    bucket->used = MAX(now, bucket->used);
    bucket->packet_count += cnt;
    bucket->byte_count += size;
    bucket->tcp_flags |= tcp_flags;
    ovs_mutex_unlock(&bucket->mutex);
}

static void *
dp_netdev_stats_new_cb(void)
{
    struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
    ovs_mutex_init(&bucket->mutex);
    return bucket;
}

static void
dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt)
{
    struct dp_netdev_stats *bucket;

    bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
    ovs_mutex_lock(&bucket->mutex);
    bucket->n[type] += cnt;
    ovs_mutex_unlock(&bucket->mutex);
}

static int
dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_,
                 struct flow *flow, struct flow_wildcards *wc,
                 enum dpif_upcall_type type, const struct nlattr *userdata,
                 struct ofpbuf *actions, struct ofpbuf *put_actions)
{
    struct ofpbuf *packet = &packet_->ofpbuf;

    if (type == DPIF_UC_MISS) {
        dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
    }

    if (OVS_UNLIKELY(!dp->upcall_cb)) {
        return ENODEV;
    }

    if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
        struct ds ds = DS_EMPTY_INITIALIZER;
        struct ofpbuf key;
        char *packet_str;

        ofpbuf_init(&key, 0);
        odp_flow_key_from_flow(&key, flow, &wc->masks, flow->in_port.odp_port,
                               true);

        packet_str = ofp_packet_to_string(ofpbuf_data(packet),
                                          ofpbuf_size(packet));

        odp_flow_key_format(ofpbuf_data(&key), ofpbuf_size(&key), &ds);

        VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name,
                 dpif_upcall_type_to_string(type), ds_cstr(&ds), packet_str);

        ofpbuf_uninit(&key);
        free(packet_str);
        ds_destroy(&ds);
    }

    return dp->upcall_cb(packet, flow, type, userdata, actions, wc,
                         put_actions, dp->upcall_aux);
}

static inline uint32_t
dpif_netdev_packet_get_dp_hash(struct dpif_packet *packet,
                               const struct miniflow *mf)
{
    uint32_t hash;

    hash = dpif_packet_get_dp_hash(packet);
    if (OVS_UNLIKELY(!hash)) {
        hash = miniflow_hash_5tuple(mf, 0);
        dpif_packet_set_dp_hash(packet, hash);
    }
    return hash;
}

struct packet_batch {
    unsigned int packet_count;
    unsigned int byte_count;
    uint16_t tcp_flags;

    struct dp_netdev_flow *flow;

    struct dpif_packet *packets[NETDEV_MAX_RX_BATCH];
    struct pkt_metadata md;
};

static inline void
packet_batch_update(struct packet_batch *batch, struct dpif_packet *packet,
                    const struct miniflow *mf)
{
    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
    batch->packets[batch->packet_count++] = packet;
    batch->byte_count += ofpbuf_size(&packet->ofpbuf);
}

static inline void
packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow,
                  struct pkt_metadata *md)
{
    batch->flow = flow;
    batch->md = *md;

    batch->packet_count = 0;
    batch->byte_count = 0;
    batch->tcp_flags = 0;
}

static inline void
packet_batch_execute(struct packet_batch *batch,
                     struct dp_netdev_pmd_thread *pmd)
{
    struct dp_netdev_actions *actions;
    struct dp_netdev_flow *flow = batch->flow;

    dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count,
                        batch->tcp_flags);

    actions = dp_netdev_flow_get_actions(flow);

    dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true,
                              &batch->md, actions->actions, actions->size);

    dp_netdev_count_packet(pmd->dp, DP_STAT_HIT, batch->packet_count);
}

static inline bool
dp_netdev_queue_batches(struct dpif_packet *pkt, struct pkt_metadata *md,
                        struct dp_netdev_flow *flow, const struct miniflow *mf,
                        struct packet_batch *batches, size_t *n_batches,
                        size_t max_batches)
{
    struct packet_batch *batch = NULL;
    int j;

    if (OVS_UNLIKELY(!flow)) {
        return false;
    }
    /* XXX: This O(n^2) algortihm makes sense if we're operating under the
     * assumption that the number of distinct flows (and therefore the
     * number of distinct batches) is quite small.  If this turns out not
     * to be the case, it may make sense to pre sort based on the
     * netdev_flow pointer.  That done we can get the appropriate batching
     * in O(n * log(n)) instead. */
    for (j = *n_batches - 1; j >= 0; j--) {
        if (batches[j].flow == flow) {
            batch = &batches[j];
            packet_batch_update(batch, pkt, mf);
            return true;
        }
    }
    if (OVS_UNLIKELY(*n_batches >= max_batches)) {
        return false;
    }

    batch = &batches[(*n_batches)++];
    packet_batch_init(batch, flow, md);
    packet_batch_update(batch, pkt, mf);
    return true;
}

static inline void
dpif_packet_swap(struct dpif_packet **a, struct dpif_packet **b)
{
    struct dpif_packet *tmp = *a;
    *a = *b;
    *b = tmp;
}

/* Try to process all ('cnt') the 'packets' using only the exact match cache
 * 'flow_cache'. If a flow is not found for a packet 'packets[i]', or if there
 * is no matching batch for a packet's flow, the miniflow is copied into 'keys'
 * and the packet pointer is moved at the beginning of the 'packets' array.
 *
 * The function returns the number of packets that needs to be processed in the
 * 'packets' array (they have been moved to the beginning of the vector).
 */
static inline size_t
emc_processing(struct dp_netdev_pmd_thread *pmd, struct dpif_packet **packets,
               size_t cnt, struct pkt_metadata *md,
               struct netdev_flow_key *keys)
{
    struct netdev_flow_key key;
    struct packet_batch batches[4];
    struct emc_cache *flow_cache = &pmd->flow_cache;
    size_t n_batches, i;
    size_t notfound_cnt = 0;

    n_batches = 0;
    miniflow_initialize(&key.flow, key.buf);
    for (i = 0; i < cnt; i++) {
        struct dp_netdev_flow *flow;
        uint32_t hash;

        if (OVS_UNLIKELY(ofpbuf_size(&packets[i]->ofpbuf) < ETH_HEADER_LEN)) {
            dpif_packet_delete(packets[i]);
            continue;
        }

        miniflow_extract(&packets[i]->ofpbuf, md, &key.flow);

        hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.flow);

        flow = emc_lookup(flow_cache, &key.flow, hash);
        if (OVS_UNLIKELY(!dp_netdev_queue_batches(packets[i], md,
                                                  flow,  &key.flow,
                                                  batches, &n_batches,
                                                  ARRAY_SIZE(batches)))) {
            if (i != notfound_cnt) {
                dpif_packet_swap(&packets[i], &packets[notfound_cnt]);
            }

            keys[notfound_cnt++] = key;
        }
    }

    for (i = 0; i < n_batches; i++) {
        packet_batch_execute(&batches[i], pmd);
    }

    return notfound_cnt;
}

static inline void
fast_path_processing(struct dp_netdev_pmd_thread *pmd,
                     struct dpif_packet **packets, size_t cnt,
                     struct pkt_metadata *md, struct netdev_flow_key *keys)
{
#if !defined(__CHECKER__) && !defined(_WIN32)
    const size_t PKT_ARRAY_SIZE = cnt;
#else
    /* Sparse or MSVC doesn't like variable length array. */
    enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH };
#endif
    struct packet_batch batches[PKT_ARRAY_SIZE];
    const struct miniflow *mfs[PKT_ARRAY_SIZE]; /* NULL at bad packets. */
    struct cls_rule *rules[PKT_ARRAY_SIZE];
    struct dp_netdev *dp = pmd->dp;
    struct emc_cache *flow_cache = &pmd->flow_cache;
    size_t n_batches, i;
    bool any_miss;

    for (i = 0; i < cnt; i++) {
        mfs[i] = &keys[i].flow;
    }
    any_miss = !classifier_lookup_miniflow_batch(&dp->cls, mfs, rules, cnt);
    if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
        uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
        struct ofpbuf actions, put_actions;
        struct match match;

        ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub);
        ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub);

        for (i = 0; i < cnt; i++) {
            const struct dp_netdev_flow *netdev_flow;
            struct ofpbuf *add_actions;
            int error;

            if (OVS_LIKELY(rules[i] || !mfs[i])) {
                continue;
            }

            /* It's possible that an earlier slow path execution installed
             * the rule this flow needs.  In this case, it's a lot cheaper
             * to catch it here than execute a miss. */
            netdev_flow = dp_netdev_lookup_flow(dp, mfs[i]);
            if (netdev_flow) {
                rules[i] = CONST_CAST(struct cls_rule *, &netdev_flow->cr);
                continue;
            }

            miniflow_expand(mfs[i], &match.flow);

            ofpbuf_clear(&actions);
            ofpbuf_clear(&put_actions);

            error = dp_netdev_upcall(dp, packets[i], &match.flow, &match.wc,
                                      DPIF_UC_MISS, NULL, &actions,
                                      &put_actions);
            if (OVS_UNLIKELY(error && error != ENOSPC)) {
                continue;
            }

            /* We can't allow the packet batching in the next loop to execute
             * the actions.  Otherwise, if there are any slow path actions,
             * we'll send the packet up twice. */
            dp_netdev_execute_actions(pmd, &packets[i], 1, false, md,
                                      ofpbuf_data(&actions),
                                      ofpbuf_size(&actions));

            add_actions = ofpbuf_size(&put_actions)
                ? &put_actions
                : &actions;

            ovs_mutex_lock(&dp->flow_mutex);
            /* XXX: There's a brief race where this flow could have already
             * been installed since we last did the flow lookup.  This could be
             * solved by moving the mutex lock outside the loop, but that's an
             * awful long time to be locking everyone out of making flow
             * installs.  If we move to a per-core classifier, it would be
             * reasonable. */
            if (OVS_LIKELY(error != ENOSPC)
                && !dp_netdev_lookup_flow(dp, mfs[i])) {
                dp_netdev_flow_add(dp, &match, ofpbuf_data(add_actions),
                                   ofpbuf_size(add_actions));
            }
            ovs_mutex_unlock(&dp->flow_mutex);
        }

        ofpbuf_uninit(&actions);
        ofpbuf_uninit(&put_actions);
        fat_rwlock_unlock(&dp->upcall_rwlock);
    }

    n_batches = 0;
    for (i = 0; i < cnt; i++) {
        struct dpif_packet *packet = packets[i];
        struct dp_netdev_flow *flow;

        if (OVS_UNLIKELY(!rules[i] || !mfs[i])) {
            continue;
        }

        flow = dp_netdev_flow_cast(rules[i]);
        emc_insert(flow_cache, mfs[i], dpif_packet_get_dp_hash(packet),
                   flow);
        dp_netdev_queue_batches(packet, md, flow, mfs[i], batches, &n_batches,
                                ARRAY_SIZE(batches));
    }

    for (i = 0; i < n_batches; i++) {
        packet_batch_execute(&batches[i], pmd);
    }
}

static void
dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
                struct dpif_packet **packets, int cnt, struct pkt_metadata *md)
{
#if !defined(__CHECKER__) && !defined(_WIN32)
    const size_t PKT_ARRAY_SIZE = cnt;
#else
    /* Sparse or MSVC doesn't like variable length array. */
    enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH };
#endif
    struct netdev_flow_key keys[PKT_ARRAY_SIZE];
    size_t newcnt;

    newcnt = emc_processing(pmd, packets, cnt, md, keys);
    if (OVS_UNLIKELY(newcnt)) {
        fast_path_processing(pmd, packets, newcnt, md, keys);
    }
}

struct dp_netdev_execute_aux {
    struct dp_netdev_pmd_thread *pmd;
};

static void
dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb,
                               void *aux)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    dp->upcall_aux = aux;
    dp->upcall_cb = cb;
}

static void
dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt,
              struct pkt_metadata *md,
              const struct nlattr *a, bool may_steal)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct dp_netdev_execute_aux *aux = aux_;
    uint32_t *depth = recirc_depth_get();
    struct dp_netdev_pmd_thread *pmd= aux->pmd;
    struct dp_netdev *dp= pmd->dp;
    int type = nl_attr_type(a);
    struct dp_netdev_port *p;
    int i;

    switch ((enum ovs_action_attr)type) {
    case OVS_ACTION_ATTR_OUTPUT:
        p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
        if (OVS_LIKELY(p)) {
            netdev_send(p->netdev, pmd->core_id, packets, cnt, may_steal);
        } else if (may_steal) {
            for (i = 0; i < cnt; i++) {
                dpif_packet_delete(packets[i]);
            }
        }
        break;

    case OVS_ACTION_ATTR_USERSPACE:
        if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
            const struct nlattr *userdata;
            struct ofpbuf actions;
            struct flow flow;

            userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
            ofpbuf_init(&actions, 0);

            for (i = 0; i < cnt; i++) {
                int error;

                ofpbuf_clear(&actions);

                flow_extract(&packets[i]->ofpbuf, md, &flow);
                error = dp_netdev_upcall(dp, packets[i], &flow, NULL,
                                         DPIF_UC_ACTION, userdata, &actions,
                                         NULL);
                if (!error || error == ENOSPC) {
                    dp_netdev_execute_actions(pmd, &packets[i], 1, false, md,
                                              ofpbuf_data(&actions),
                                              ofpbuf_size(&actions));
                }

                if (may_steal) {
                    dpif_packet_delete(packets[i]);
                }
            }
            ofpbuf_uninit(&actions);
            fat_rwlock_unlock(&dp->upcall_rwlock);
        }

        break;

    case OVS_ACTION_ATTR_HASH: {
        const struct ovs_action_hash *hash_act;
        uint32_t hash;

        hash_act = nl_attr_get(a);

        for (i = 0; i < cnt; i++) {

            if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
                /* Hash need not be symmetric, nor does it need to include
                 * L2 fields. */
                hash = hash_2words(dpif_packet_get_dp_hash(packets[i]),
                                   hash_act->hash_basis);
            } else {
                VLOG_WARN("Unknown hash algorithm specified "
                          "for the hash action.");
                hash = 2;
            }

            if (!hash) {
                hash = 1; /* 0 is not valid */
            }

            if (i == 0) {
                md->dp_hash = hash;
            }
            dpif_packet_set_dp_hash(packets[i], hash);
        }
        break;
    }

    case OVS_ACTION_ATTR_RECIRC:
        if (*depth < MAX_RECIRC_DEPTH) {

            (*depth)++;
            for (i = 0; i < cnt; i++) {
                struct dpif_packet *recirc_pkt;
                struct pkt_metadata recirc_md = *md;

                recirc_pkt = (may_steal) ? packets[i]
                                    : dpif_packet_clone(packets[i]);

                recirc_md.recirc_id = nl_attr_get_u32(a);

                /* Hash is private to each packet */
                recirc_md.dp_hash = dpif_packet_get_dp_hash(packets[i]);

                dp_netdev_input(pmd, &recirc_pkt, 1,
                                &recirc_md);
            }
            (*depth)--;

            break;
        } else {
            VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
            if (may_steal) {
                for (i = 0; i < cnt; i++) {
                    dpif_packet_delete(packets[i]);
                }
            }
        }
        break;

    case OVS_ACTION_ATTR_PUSH_VLAN:
    case OVS_ACTION_ATTR_POP_VLAN:
    case OVS_ACTION_ATTR_PUSH_MPLS:
    case OVS_ACTION_ATTR_POP_MPLS:
    case OVS_ACTION_ATTR_SET:
    case OVS_ACTION_ATTR_SET_MASKED:
    case OVS_ACTION_ATTR_SAMPLE:
    case OVS_ACTION_ATTR_UNSPEC:
    case __OVS_ACTION_ATTR_MAX:
        OVS_NOT_REACHED();
    }
}

static void
dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                          struct dpif_packet **packets, int cnt,
                          bool may_steal, struct pkt_metadata *md,
                          const struct nlattr *actions, size_t actions_len)
{
    struct dp_netdev_execute_aux aux = {pmd};

    odp_execute_actions(&aux, packets, cnt, may_steal, md, actions,
                        actions_len, dp_execute_cb);
}

const struct dpif_class dpif_netdev_class = {
    "netdev",
    dpif_netdev_enumerate,
    dpif_netdev_port_open_type,
    dpif_netdev_open,
    dpif_netdev_close,
    dpif_netdev_destroy,
    dpif_netdev_run,
    dpif_netdev_wait,
    dpif_netdev_get_stats,
    dpif_netdev_port_add,
    dpif_netdev_port_del,
    dpif_netdev_port_query_by_number,
    dpif_netdev_port_query_by_name,
    NULL,                       /* port_get_pid */
    dpif_netdev_port_dump_start,
    dpif_netdev_port_dump_next,
    dpif_netdev_port_dump_done,
    dpif_netdev_port_poll,
    dpif_netdev_port_poll_wait,
    dpif_netdev_flow_flush,
    dpif_netdev_flow_dump_create,
    dpif_netdev_flow_dump_destroy,
    dpif_netdev_flow_dump_thread_create,
    dpif_netdev_flow_dump_thread_destroy,
    dpif_netdev_flow_dump_next,
    dpif_netdev_operate,
    NULL,                       /* recv_set */
    NULL,                       /* handlers_set */
    dpif_netdev_queue_to_priority,
    NULL,                       /* recv */
    NULL,                       /* recv_wait */
    NULL,                       /* recv_purge */
    dpif_netdev_register_upcall_cb,
    dpif_netdev_enable_upcall,
    dpif_netdev_disable_upcall,
};

static void
dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
                              const char *argv[], void *aux OVS_UNUSED)
{
    struct dp_netdev_port *old_port;
    struct dp_netdev_port *new_port;
    struct dp_netdev *dp;
    odp_port_t port_no;

    ovs_mutex_lock(&dp_netdev_mutex);
    dp = shash_find_data(&dp_netdevs, argv[1]);
    if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
        ovs_mutex_unlock(&dp_netdev_mutex);
        unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
        return;
    }
    ovs_refcount_ref(&dp->ref_cnt);
    ovs_mutex_unlock(&dp_netdev_mutex);

    ovs_mutex_lock(&dp->port_mutex);
    if (get_port_by_name(dp, argv[2], &old_port)) {
        unixctl_command_reply_error(conn, "unknown port");
        goto exit;
    }

    port_no = u32_to_odp(atoi(argv[3]));
    if (!port_no || port_no == ODPP_NONE) {
        unixctl_command_reply_error(conn, "bad port number");
        goto exit;
    }
    if (dp_netdev_lookup_port(dp, port_no)) {
        unixctl_command_reply_error(conn, "port number already in use");
        goto exit;
    }

    /* Remove old port. */
    cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
    ovsrcu_postpone(free, old_port);

    /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
    new_port = xmemdup(old_port, sizeof *old_port);
    new_port->port_no = port_no;
    cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));

    seq_change(dp->port_seq);
    unixctl_command_reply(conn, NULL);

exit:
    ovs_mutex_unlock(&dp->port_mutex);
    dp_netdev_unref(dp);
}

static void
dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED,
                       const char *argv[], void *aux OVS_UNUSED)
{
    struct dp_netdev_port *port;
    struct dp_netdev *dp;

    ovs_mutex_lock(&dp_netdev_mutex);
    dp = shash_find_data(&dp_netdevs, argv[1]);
    if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
        ovs_mutex_unlock(&dp_netdev_mutex);
        unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
        return;
    }
    ovs_refcount_ref(&dp->ref_cnt);
    ovs_mutex_unlock(&dp_netdev_mutex);

    ovs_mutex_lock(&dp->port_mutex);
    if (get_port_by_name(dp, argv[2], &port)) {
        unixctl_command_reply_error(conn, "unknown port");
    } else if (port->port_no == ODPP_LOCAL) {
        unixctl_command_reply_error(conn, "can't delete local port");
    } else {
        do_del_port(dp, port);
        unixctl_command_reply(conn, NULL);
    }
    ovs_mutex_unlock(&dp->port_mutex);

    dp_netdev_unref(dp);
}

static void
dpif_dummy_register__(const char *type)
{
    struct dpif_class *class;

    class = xmalloc(sizeof *class);
    *class = dpif_netdev_class;
    class->type = xstrdup(type);
    dp_register_provider(class);
}

void
dpif_dummy_register(bool override)
{
    if (override) {
        struct sset types;
        const char *type;

        sset_init(&types);
        dp_enumerate_types(&types);
        SSET_FOR_EACH (type, &types) {
            if (!dp_unregister_provider(type)) {
                dpif_dummy_register__(type);
            }
        }
        sset_destroy(&types);
    }

    dpif_dummy_register__("dummy");

    unixctl_command_register("dpif-dummy/change-port-number",
                             "dp port new-number",
                             3, 3, dpif_dummy_change_port_number, NULL);
    unixctl_command_register("dpif-dummy/delete-port", "dp port",
                             2, 2, dpif_dummy_delete_port, NULL);
}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/*
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								#include <config.h>
-												netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads

DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.

This commit resolves the issue with the following changes:

- Every non pmd thread has the same lcore_id (0, for management reasons), which
  is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
  use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
  held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
  threads: therefore this commit partially revert 143859ec63d45e. Now packets
  are copied for upcall processing. We can remove the extra memcpy by
  processing upcalls in the pmd thread itself.

With the introduction of the extra locking, the packet throughput will be lower
in the following cases:

- When using internal (tap) devices with DPDK devices on the same datapath.
  Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
  which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
  can be avoided by handling the upcalls directly in pmd threads (a change that
  has already been proposed by Ryan Wilson)

Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
  This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
  mempools

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-07-17 14:29:36 -07:00
+								#include "dpif-netdev.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								#include <ctype.h>
 								#include <errno.h>
 								#include <fcntl.h>
 								#include <inttypes.h>
 								#include <netinet/in.h>
-												Always #include <sys/socket.h> before <net/if.h>.

FreeBSD 8.0's <net/if.h> requires <sys/socket.h> to be included first,
even though I don't see any such requirement in POSIX.

											
										
										
											2010-05-26 10:05:19 -07:00
+								#include <sys/socket.h>
-												Work around bugs in system headers.

On some system, at least, one must include <sys/types.h> before
<netinet/in.h>, and <netinet/in.h> before <arpa/inet.h> or <net/if.h>.

From Jean Tourrilhes <jt@hpl.hp.com>.

											
										
										
											2010-02-12 12:51:36 -08:00
+								#include <net/if.h>
-												datapath: Replace "struct odp_action" by Netlink attributes.

In the medium term, we plan to migrate the datapath to use Netlink as its
communication channel.  In the short term, we need to be able to have
actions with 64-bit arguments but "struct odp_action" only has room for
48 bits.  So this patch shifts to variable-length arguments using Netlink
attributes, which starts in on the Netlink transition and makes 64-bit
arguments possible at the same time.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-10 10:40:58 -08:00
+								#include <stdint.h>
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include <stdlib.h>
 								#include <string.h>
 								#include <sys/ioctl.h>
 								#include <sys/stat.h>
 								#include <unistd.h>
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								#include "classifier.h"
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								#include "cmap.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "csum.h"
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								#include "dpif.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "dpif-provider.h"
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								#include "dummy.h"
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								#include "dynamic-string.h"
-												lib/classifier: Lockless lookups.

Now that all the relevant classifier structures use RCU and internal
mutual exclusion for modifications, we can remove the fat-rwlock and
thus make the classifier lookups lockless.

As the readers are operating concurrently with the writers, a
concurrent reader may or may not see a new rule being added by a
writer, depending on how the concurrent events overlap with each
other.  Overall, this is no different from the former locked behavior,
but there the visibility of the new rule only depended on the timing
of the locking functions.

A new rule is first added to the segment indices, so the readers may
find the rule in the indices before the rule is visible in the
subtables 'rules' map.  This may result in us losing the opportunity
to quit lookups earlier, resulting in sub-optimal wildcarding.  This
will be fixed by forthcoming revalidation always scheduled after flow
table changes.

Similar behavior may happen due to us removing the overlapping rule
(if any) from the indices only after the corresponding new rule has
been added.

The subtable's max priority is updated only after a rule is inserted
to the maps, so the concurrent readers may not see the rule, as the
updated priority ordered subtable list will only be visible after the
subtable's max priority is updated.

Similarly, the classifier's partitions are updated by the caller after
the rule is inserted to the maps, so the readers may keep skipping the
subtable until they see the updated partitions.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								#include "fat-rwlock.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "flow.h"
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								#include "cmap.h"
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								#include "latch.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "list.h"
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								#include "meta-flow.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "netdev.h"
-												netdev-dpdk: Use multiple core for dpdk IO.

DPDK need to set _lcore_id for using multiple core.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 22:07:44 -07:00
+								#include "netdev-dpdk.h"
-												netdev: New function netdev_get_dpif_port().

In future patches, a netdev's datapath port name may not
necessarily be the same as its device name. This patch prepares for
this by making the distinction in the netdev and dpif layers.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2012-12-16 17:08:50 -08:00
+								#include "netdev-vport.h"
-												datapath: Replace "struct odp_action" by Netlink attributes.

In the medium term, we plan to migrate the datapath to use Netlink as its
communication channel.  In the short term, we need to be able to have
actions with 64-bit arguments but "struct odp_action" only has room for
48 bits.  So this patch shifts to variable-length arguments using Netlink
attributes, which starts in on the Netlink transition and makes 64-bit
arguments possible at the same time.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-10 10:40:58 -08:00
+								#include "netlink.h"
-												odp-execute: New module for executing datapath actions.

This moves generic action execution code out of lib/dpif-netedev.c
and into a new file, lib/odp-execute.c.

This is in preparation for using odp_execute_actions()
in lib/odp-util.c to handle recirculation/

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-29 15:06:38 +09:00
+								#include "odp-execute.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "odp-util.h"
 								#include "ofp-print.h"
 								#include "ofpbuf.h"
-												dpif-netdev: Create multiple tx/rx queues when adding dpdk interface.

Before this commit, ovs creates one tx and one rx queue for
each dpdk interface and uses only one poll thread for handling
I/O of all dpdk interfaces.  An upcoming patch will allow multiple
poll threads be created.  As a preparation, this commit changes
the dpif-netdev to create multiple tx/rx queues when the dpdk
interface is added.

Specifically, the number of rx queues will still be one per-dpdk
interface for this commit.  But upcoming work will allow user
create multiple rx queues.  The number of tx queues will be the
number of cpu cores on the machine.  Although not all the tx queues
will be used, each poll thread will have its own queue for
transmission on the dpdk interface.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-06-17 10:52:20 -07:00
+								#include "ovs-numa.h"
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								#include "ovs-rcu.h"
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								#include "packet-dpif.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "packets.h"
 								#include "poll-loop.h"
-												dpif-netdev: Implement OVS_ACTION_ATTR_SAMPLE action.

OVS_ACTION_ATTR_SAMPLE has never been implemented in dpif-netdev.  This
commit implements it and adds a cast to enum ovs_action_type in the switch
statement that checks the action type, so that GCC complains if we forget
to add a case for a new action type.

I had to assign the return value of nl_attr_type() to a temporary variable,
because "switch ((enum ovs_action_type) nl_attr_type(a))" provoked a GCC
warning that I've never seen before:

../lib/dpif-netdev.c:1260: warning: cast from function call of type 'int'
     to non-matching type 'enum ovs_action_type'

											
										
										
											2011-10-11 11:07:14 -07:00
+								#include "random.h"
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								#include "seq.h"
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								#include "shash.h"
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								#include "sset.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "timeval.h"
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								#include "unixctl.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "util.h"
 								#include "vlog.h"
-												vlog: Introduce VLOG_DEFINE_THIS_MODULE for declaring vlog module in use.

Adding a macro to define the vlog module in use adds a level of
indirection, which makes it easier to change how the vlog module must be
defined.  A followup commit needs to do that, so getting these widespread
changes out of the way first should make that commit easier to review.

											
										
										
											2010-07-16 11:02:49 -07:00
-												vlog: Make client supply semicolon for VLOG_DEFINE_THIS_MODULE.

It's kind of odd for VLOG_DEFINE_THIS_MODULE to supply its own semicolon,
so this commit switches to the more common form.

											
										
										
											2010-10-19 14:47:01 -07:00
+								VLOG_DEFINE_THIS_MODULE(dpif_netdev);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								/* By default, choose a priority in the middle. */
 								#define NETDEV_RULE_PRIORITY 0x8000
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								#define FLOW_DUMP_MAX_BATCH 50
-												ofproto/bond: Implement bond megaflow using recirculation

Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-05 15:27:31 -08:00
+								/* Use per thread recirc_depth to prevent recirculation loop. */
 								#define MAX_RECIRC_DEPTH 5
 								DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/* Configuration parameters. */
 								enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* Protects against changes to 'dp_netdevs'. */
 								static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
 								/* Contains all 'struct dp_netdev's. */
 								static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
 								    = SHASH_INITIALIZER(&dp_netdevs);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								/* Stores a miniflow with inline values */
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
 								/* There are fields in the flow structure that we never use. Therefore we can
 								 * save a few words of memory */
 								#define NETDEV_KEY_BUF_SIZE_U32 (FLOW_U32S                 \
 								                                 - MINI_N_INLINE           \
 								                                 - FLOW_U32_SIZE(regs)     \
 								                                 - FLOW_U32_SIZE(metadata) \
 								                                )
 								struct netdev_flow_key {
 								    struct miniflow flow;
 								    uint32_t buf[NETDEV_KEY_BUF_SIZE_U32];
 								};
 								/* Exact match cache for frequently used flows
 								 *
 								 * The cache uses a 32-bit hash of the packet (which can be the RSS hash) to
 								 * search its entries for a miniflow that matches exactly the miniflow of the
 								 * packet. It stores the 'cls_rule'(rule) that matches the miniflow.
 								 *
 								 * A cache entry holds a reference to its 'dp_netdev_flow'.
 								 *
 								 * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS different
 								 * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values (each of
 								 * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown away). Each
 								 * value is the index of a cache entry where the miniflow could be.
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * Each pmd_thread has its own private exact match cache.
 								 * If dp_netdev_input is not called from a pmd thread, a mutex is used.
 								 */
 								#define EM_FLOW_HASH_SHIFT 10
 								#define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT)
 								#define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1)
 								#define EM_FLOW_HASH_SEGS 2
 								struct emc_entry {
 								    uint32_t hash;
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								    uint32_t mf_len;
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    struct netdev_flow_key mf;
 								    struct dp_netdev_flow *flow;
 								};
 								struct emc_cache {
 								    struct emc_entry entries[EM_FLOW_HASH_ENTRIES];
 								};
 								/* Iterate in the exact match cache through every entry that might contain a
 								 * miniflow with hash 'HASH'. */
 								#define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH)                 \
 								    for (uint32_t i__ = 0, srch_hash__ = (HASH);                             \
 								         (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & EM_FLOW_HASH_MASK], \
 								         i__ < EM_FLOW_HASH_SEGS;                                            \
 								         i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* Datapath based on the network device interface from netdev.h.
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * Some members, marked 'const', are immutable.  Accessing other members
 								 * requires synchronization, as noted in more detail below.
 								 *
 								 * Acquisition order is, from outermost to innermost:
 								 *
 								 *    dp_netdev_mutex (global)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								 *    port_mutex
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								 *    flow_mutex
 								 */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								struct dp_netdev {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    const struct dpif_class *const class;
 								    const char *const name;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    struct dpif *dpif;
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    struct ovs_refcount ref_cnt;
 								    atomic_flag destroyed;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Flows.
 								     *
-												lib/classifier: Lockless lookups.

Now that all the relevant classifier structures use RCU and internal
mutual exclusion for modifications, we can remove the fat-rwlock and
thus make the classifier lookups lockless.

As the readers are operating concurrently with the writers, a
concurrent reader may or may not see a new rule being added by a
writer, depending on how the concurrent events overlap with each
other.  Overall, this is no different from the former locked behavior,
but there the visibility of the new rule only depended on the timing
of the locking functions.

A new rule is first added to the segment indices, so the readers may
find the rule in the indices before the rule is visible in the
subtables 'rules' map.  This may result in us losing the opportunity
to quit lookups earlier, resulting in sub-optimal wildcarding.  This
will be fixed by forthcoming revalidation always scheduled after flow
table changes.

Similar behavior may happen due to us removing the overlapping rule
(if any) from the indices only after the corresponding new rule has
been added.

The subtable's max priority is updated only after a rule is inserted
to the maps, so the concurrent readers may not see the rule, as the
updated priority ordered subtable list will only be visible after the
subtable's max priority is updated.

Similarly, the classifier's partitions are updated by the caller after
the rule is inserted to the maps, so the readers may keep skipping the
subtable until they see the updated partitions.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								     * Writers of 'flow_table' must take the 'flow_mutex'.  Corresponding
 								     * changes to 'cls' must be made while still holding the 'flow_mutex'.
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								     */
 								    struct ovs_mutex flow_mutex;
-												lib/classifier: Lockless lookups.

Now that all the relevant classifier structures use RCU and internal
mutual exclusion for modifications, we can remove the fat-rwlock and
thus make the classifier lookups lockless.

As the readers are operating concurrently with the writers, a
concurrent reader may or may not see a new rule being added by a
writer, depending on how the concurrent events overlap with each
other.  Overall, this is no different from the former locked behavior,
but there the visibility of the new rule only depended on the timing
of the locking functions.

A new rule is first added to the segment indices, so the readers may
find the rule in the indices before the rule is visible in the
subtables 'rules' map.  This may result in us losing the opportunity
to quit lookups earlier, resulting in sub-optimal wildcarding.  This
will be fixed by forthcoming revalidation always scheduled after flow
table changes.

Similar behavior may happen due to us removing the overlapping rule
(if any) from the indices only after the corresponding new rule has
been added.

The subtable's max priority is updated only after a rule is inserted
to the maps, so the concurrent readers may not see the rule, as the
updated priority ordered subtable list will only be visible after the
subtable's max priority is updated.

Similarly, the classifier's partitions are updated by the caller after
the rule is inserted to the maps, so the readers may keep skipping the
subtable until they see the updated partitions.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-07-11 02:29:08 -07:00
+								    struct classifier cls;
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    struct cmap flow_table OVS_GUARDED; /* Flow table. */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								    /* Statistics.
 								     *
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								     * ovsthread_stats is internally synchronized. */
 								    struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Ports.
 								     *
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								     * Protected by RCU.  Take the mutex to add or remove ports. */
 								    struct ovs_mutex port_mutex;
 								    struct cmap ports;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    struct seq *port_seq;       /* Incremented whenever a port changes. */
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    /* Protects access to ofproto-dpif-upcall interface during revalidator
 								     * thread synchronization. */
 								    struct fat_rwlock upcall_rwlock;
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    upcall_callback *upcall_cb;  /* Callback function for executing upcalls. */
 								    void *upcall_aux;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    /* Stores all 'struct dp_netdev_pmd_thread's. */
 								    struct cmap poll_threads;
 								    /* Protects the access of the 'struct dp_netdev_pmd_thread'
 								     * instance for non-pmd thread. */
 								    struct ovs_mutex non_pmd_mutex;
 								    /* Each pmd thread will store its pointer to
 								     * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */
 								    ovsthread_key_t per_pmd_key;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								                                                    odp_port_t);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								enum dp_stat_type {
 								    DP_STAT_HIT,                /* Packets that matched in the flow table. */
 								    DP_STAT_MISS,               /* Packets that did not match. */
 								    DP_STAT_LOST,               /* Packets not passed up to the client. */
 								    DP_N_STATS
 								};
 								/* Contained by struct dp_netdev's 'stats' member.  */
 								struct dp_netdev_stats {
 								    struct ovs_mutex mutex;          /* Protects 'n'. */
 								    /* Indexed by DP_STAT_*, protected by 'mutex'. */
 								    unsigned long long int n[DP_N_STATS] OVS_GUARDED;
 								};
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/* A port in a netdev-based datapath. */
 								struct dp_netdev_port {
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    struct cmap_node node;      /* Node in dp_netdev's 'ports'. */
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    odp_port_t port_no;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct netdev *netdev;
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    struct netdev_saved_flags *sf;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    struct netdev_rxq **rxq;
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								    struct ovs_refcount ref_cnt;
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    char *type;                 /* Port type as requested by user. */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* A flow in dp_netdev's 'flow_table'.
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
 								 * its dp_netdev's classifier.  The text below calls this classifier 'cls'.
 								 *
 								 * Motivation
 								 * ----------
 								 *
 								 * The thread safety rules described here for "struct dp_netdev_flow" are
 								 * motivated by two goals:
 								 *
 								 *    - Prevent threads that read members of "struct dp_netdev_flow" from
 								 *      reading bad data due to changes by some thread concurrently modifying
 								 *      those members.
 								 *
 								 *    - Prevent two threads making changes to members of a given "struct
 								 *      dp_netdev_flow" from interfering with each other.
 								 *
 								 *
 								 * Rules
 								 * -----
 								 *
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
+								 * A flow 'flow' may be accessed without a risk of being freed during an RCU
 								 * grace period.  Code that needs to hold onto a flow for a while
 								 * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref().
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								 *
 								 * 'flow->ref_cnt' protects 'flow' from being freed.  It doesn't protect the
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
+								 * flow from being deleted from 'cls' and it doesn't protect members of 'flow'
 								 * from modification.
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								 *
 								 * Some members, marked 'const', are immutable.  Accessing other members
 								 * requires synchronization, as noted in more detail below.
 								 */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								struct dp_netdev_flow {
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    bool dead;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    /* Packet classification. */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    const struct cls_rule cr;   /* In owning dp_netdev's 'cls'. */
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Hash table index by unmasked flow. */
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    const struct cmap_node node; /* In owning dp_netdev's 'flow_table'. */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    const struct flow flow;      /* The flow that created this entry. */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
+								    /* Number of references.
 								     * The classifier owns one reference.
 								     * Any thread trying to keep a rule from being freed should hold its own
 								     * reference. */
 								    struct ovs_refcount ref_cnt;
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Statistics.
 								     *
 								     * Reading or writing these members requires 'mutex'. */
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Remove unused members

Simplify code and update comments after commit 61e7deb1.
("dpif-netdev: Use RCU to protect data.")

Acked-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-04-15 14:59:30 +09:00
+								    /* Actions. */
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    OVSRCU_TYPE(struct dp_netdev_actions *) actions;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
+								static void dp_netdev_flow_unref(struct dp_netdev_flow *);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								static bool dp_netdev_flow_ref(struct dp_netdev_flow *);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								/* Contained by struct dp_netdev_flow's 'stats' member.  */
 								struct dp_netdev_flow_stats {
 								    struct ovs_mutex mutex;         /* Guards all the other members. */
 								    long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
 								    long long int packet_count OVS_GUARDED; /* Number of packets matched. */
 								    long long int byte_count OVS_GUARDED;   /* Number of bytes matched. */
 								    uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
 								};
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								/* A set of datapath actions within a "struct dp_netdev_flow".
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
-												dpif-netdev: Remove unused members

Simplify code and update comments after commit 61e7deb1.
("dpif-netdev: Use RCU to protect data.")

Acked-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-04-15 14:59:30 +09:00
+								 * A struct dp_netdev_actions 'actions' is protected with RCU. */
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								struct dp_netdev_actions {
 								    /* These members are immutable: they do not change during the struct's
 								     * lifetime.  */
 								    struct nlattr *actions;     /* Sequence of OVS_ACTION_ATTR_* attributes. */
 								    unsigned int size;          /* Size of 'actions', in bytes. */
 								};
 								struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
 								                                                   size_t);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								struct dp_netdev_actions *dp_netdev_flow_get_actions(
 								    const struct dp_netdev_flow *);
 								static void dp_netdev_actions_free(struct dp_netdev_actions *);
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								/* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
 								 * the performance overhead of interrupt processing.  Therefore netdev can
 								 * not implement rx-wait for these devices.  dpif-netdev needs to poll
 								 * these device to check for recv buffer.  pmd-thread does polling for
 								 * devices assigned to itself thread.
 								 *
 								 * DPDK used PMD for accessing NIC.
 								 *
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								 * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for
 								 * I/O of all non-pmd threads.  There will be no actual thread created
 								 * for the instance.
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								 **/
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								struct dp_netdev_pmd_thread {
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    struct dp_netdev *dp;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct cmap_node node;          /* In 'dp->poll_threads'. */
 								    /* Per thread exact-match cache.  Note, the instance for cpu core
 								     * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
 								     * need to be protected (e.g. by 'dp_netdev_mutex').  All other
 								     * instances will only be accessed by its own pmd thread. */
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    struct emc_cache flow_cache;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct latch exit_latch;        /* For terminating the pmd thread. */
 								    atomic_uint change_seq;         /* For reloading pmd ports. */
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    pthread_t thread;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    int index;                      /* Idx of this pmd thread among pmd*/
 								                                    /* threads on same numa node. */
 								    int core_id;                    /* CPU core id of this pmd thread. */
 								    int numa_id;                    /* numa node id of this pmd thread. */
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								};
-												lib/dpif-netdev: Clean-up pmd thread signaling.

It could be possible that the thread misses a signal when it reads the
change_seq again after reload.  Also, the counter has no dependent
data, so the memory model for the atomic read can be relaxed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
											
										
										
											2014-08-15 15:09:38 -07:00
+								#define PMD_INITIAL_SEQ 1
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/* Interface to netdev-based datapath. */
 								struct dpif_netdev {
 								    struct dpif dpif;
 								    struct dp_netdev *dp;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    uint64_t last_port_seq;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								                              struct dp_netdev_port **portp);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static int get_port_by_name(struct dp_netdev *dp, const char *devname,
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								                            struct dp_netdev_port **portp);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static void dp_netdev_free(struct dp_netdev *)
 								    OVS_REQUIRES(dp_netdev_mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void dp_netdev_flow_flush(struct dp_netdev *);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static int do_add_port(struct dp_netdev *dp, const char *devname,
 								                       const char *type, odp_port_t port_no)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    OVS_REQUIRES(dp->port_mutex);
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+								static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    OVS_REQUIRES(dp->port_mutex);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								static int dpif_netdev_open(const struct dpif_class *, const char *name,
 								                            bool create, struct dpif **);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								                                      struct dpif_packet **, int c,
 								                                      bool may_steal, struct pkt_metadata *,
-												datapath: Refactor actions in terms of match fields.

Almost all current actions can be expressed in the form of
push/pop/set <field>, where field is one of the match fields. We can
create three base actions and take a field. This has both a nice
symmetry and avoids inconsistencies where we can match on the vlan
TPID but not set it.
Following patch converts all actions to this new format.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

Bug #7115

											
										
										
											2011-10-21 14:38:54 -07:00
+								                                      const struct nlattr *actions,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								                                      size_t actions_len);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								static void dp_netdev_input(struct dp_netdev_pmd_thread *,
-												lib/dpif-netdev: Make emc_mutex recursive.

dpif_netdev_execute may be called while doing upcall processing.
Since the context of the input port is not tracked upto this point, we
use the shared dp->emc_cache for packet execution, where the emc_cache
is needed for recirculation.

While recursive mutexes can make thread safety analysis hard, for now
we change emc_mutex to be recursive.  Forthcoming new unit tests will
fail with the current non-recursive mutex.  Later improvements may
remove the need for this recursion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Daniele Di Proietto <ddiproietto@vmware.com>
											
										
										
											2014-09-08 15:33:00 -07:00
+								                            struct dpif_packet **, int cnt,
 								                            struct pkt_metadata *);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								static void dp_netdev_disable_upcall(struct dp_netdev *);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
 								                                    struct dp_netdev *dp, int index,
 								                                    int core_id, int numa_id);
 								static struct dp_netdev_pmd_thread *dp_netdev_get_nonpmd(struct dp_netdev *dp);
 								static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
 								static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 								static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								static void emc_clear_entry(struct emc_entry *ce);
 								static void
 								emc_cache_init(struct emc_cache *flow_cache)
 								{
 								    int i;
 								    for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
 								        flow_cache->entries[i].flow = NULL;
 								        flow_cache->entries[i].hash = 0;
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								        flow_cache->entries[i].mf_len = 0;
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								        miniflow_initialize(&flow_cache->entries[i].mf.flow,
 								                            flow_cache->entries[i].mf.buf);
 								    }
 								}
 								static void
 								emc_cache_uninit(struct emc_cache *flow_cache)
 								{
 								    int i;
 								    for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
 								        emc_clear_entry(&flow_cache->entries[i]);
 								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static struct dpif_netdev *
 								dpif_netdev_cast(const struct dpif *dpif)
 								{
-												Replace most uses of assert by ovs_assert.

This is a straight search-and-replace, except that I also removed #include
<assert.h> from each file where there were no assert calls left.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2012-11-06 13:14:55 -08:00
+								    ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
 								}
 								static struct dp_netdev *
 								get_dp_netdev(const struct dpif *dpif)
 								{
 								    return dpif_netdev_cast(dpif)->dp;
 								}
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								static int
-												dpif-netdev: enumerate dpif belonging to the right class

Since dpif_netdev_enumerate() is used for "netdev" and "dummy" class, it
incorrectly lists dpif-netdevs as "dummy" and vice versa.
This patches address the issue by changing the dpif-provider interface: a
dpif_class parameter is passed to the 'enumerate' call to match the right class.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-12 16:37:33 -07:00
+								dpif_netdev_enumerate(struct sset *all_dps,
 								                      const struct dpif_class *dpif_class)
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								{
 								    struct shash_node *node;
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								    SHASH_FOR_EACH(node, &dp_netdevs) {
-												dpif-netdev: enumerate dpif belonging to the right class

Since dpif_netdev_enumerate() is used for "netdev" and "dummy" class, it
incorrectly lists dpif-netdevs as "dummy" and vice versa.
This patches address the issue by changing the dpif-provider interface: a
dpif_class parameter is passed to the 'enumerate' call to match the right class.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-12 16:37:33 -07:00
+								        struct dp_netdev *dp = node->data;
 								        if (dpif_class != dp->class) {
 								            /* 'dp_netdevs' contains both "netdev" and "dummy" dpifs.
 								             * If the class doesn't match, skip this dpif. */
 								             continue;
 								        }
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								        sset_add(all_dps, node->name);
 								    }
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&dp_netdev_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								    return 0;
 								}
-												dpif-netdev: Allow stub interfaces on the dummy datapath.

Future patches will need to add netdevs to the dummy datapath which
can't actually send or receive packets.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-01-08 14:37:23 -08:00
+								static bool
 								dpif_netdev_class_is_dummy(const struct dpif_class *class)
 								{
 								    return class != &dpif_netdev_class;
 								}
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								static const char *
 								dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
 								{
 								    return strcmp(type, "internal") ? type
-												dpif-netdev: Allow stub interfaces on the dummy datapath.

Future patches will need to add netdevs to the dummy datapath which
can't actually send or receive packets.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-01-08 14:37:23 -08:00
+								                  : dpif_netdev_class_is_dummy(class) ? "dummy"
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								                  : "tap";
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static struct dpif *
 								create_dpif_netdev(struct dp_netdev *dp)
 								{
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    uint16_t netflow_id = hash_string(dp->name, 0);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dpif_netdev *dpif;
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    ovs_refcount_ref(&dp->ref_cnt);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    dpif = xmalloc(sizeof *dpif);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif->dp = dp;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    dpif->last_port_seq = seq_read(dp->port_seq);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    return &dpif->dpif;
 								}
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								/* Choose an unused, non-zero port number and return it on success.
 								 * Return ODPP_NONE on failure. */
 								static odp_port_t
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								choose_port(struct dp_netdev *dp, const char *name)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    OVS_REQUIRES(dp->port_mutex)
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								{
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    uint32_t port_no;
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
 								    if (dp->class != &dpif_netdev_class) {
 								        const char *p;
 								        int start_no = 0;
 								        /* If the port name begins with "br", start the number search at
 								         * 100 to make writing tests easier. */
 								        if (!strncmp(name, "br", 2)) {
 								            start_no = 100;
 								        }
 								        /* If the port name contains a number, try to assign that port number.
 								         * This can make writing unit tests easier because port numbers are
 								         * predictable. */
 								        for (p = name; *p != '\0'; p++) {
 								            if (isdigit((unsigned char) *p)) {
 								                port_no = start_no + strtol(p, NULL, 10);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								                if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
 								                    && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								                    return u32_to_odp(port_no);
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								                }
 								                break;
 								            }
 								        }
 								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
 								        if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								            return u32_to_odp(port_no);
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								        }
 								    }
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    return ODPP_NONE;
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								create_dp_netdev(const char *name, const struct dpif_class *class,
 								                 struct dp_netdev **dpp)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQUIRES(dp_netdev_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_pmd_thread *non_pmd;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    int error;
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    dp = xzalloc(sizeof *dp);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    shash_add(&dp_netdevs, name, dp);
 								    *CONST_CAST(const struct dpif_class **, &dp->class) = class;
 								    *CONST_CAST(const char **, &dp->name) = xstrdup(name);
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    ovs_refcount_init(&dp->ref_cnt);
-												dpif-netdev: init atomic flag dp->destroyed

It is better to explicitly initialize the dp->destroy than to rely
on xzalloc().

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-17 22:10:53 -07:00
+								    atomic_flag_clear(&dp->destroyed);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								    ovs_mutex_init(&dp->flow_mutex);
 								    classifier_init(&dp->cls, NULL);
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    cmap_init(&dp->flow_table);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    ovsthread_stats_init(&dp->stats);
-												dpif-netdev: Use new "ovsthread_counter" to track dp statistics.

ovsthread_counter is an abstract interface that could be implemented
different ways.  The initial implementation is simple but less than
optimally efficient.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-23 14:04:13 -08:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_init(&dp->port_mutex);
 								    cmap_init(&dp->ports);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    dp->port_seq = seq_create();
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    fat_rwlock_init(&dp->upcall_rwlock);
 								    /* Disable upcalls by default. */
 								    dp_netdev_disable_upcall(dp);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    dp->upcall_aux = NULL;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    dp->upcall_cb = NULL;
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    cmap_init(&dp->poll_threads);
 								    ovs_mutex_init_recursive(&dp->non_pmd_mutex);
 								    ovsthread_key_create(&dp->per_pmd_key, NULL);
 								    /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */
 								    ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID);
 								    non_pmd = xzalloc(sizeof *non_pmd);
 								    dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID,
 								                            OVS_NUMA_UNSPEC);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp->port_mutex);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    error = do_add_port(dp, name, "internal", ODPP_LOCAL);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp->port_mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    if (error) {
 								        dp_netdev_free(dp);
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								        return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    *dpp = dp;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
 								static int
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								dpif_netdev_open(const struct dpif_class *class, const char *name,
-												dpif: Make dpif_class 'open' function take class instead of type name.

This makes it easier for dpif_provider implementations to share code but
distinguish the class actually in use, because comparing a pointer is
easier than comparing a string.

											
										
										
											2010-11-18 10:06:41 -08:00
+								                 bool create, struct dpif **dpifp)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    struct dp_netdev *dp;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    dp = shash_find_data(&dp_netdevs, name);
 								    if (!dp) {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = (dp->class != class ? EINVAL
 								                 : create ? EEXIST
 								                 : 0);
 								    }
 								    if (!error) {
 								        *dpifp = create_dpif_netdev(dp);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								        dp->dpif = *dpifp;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&dp_netdev_mutex);
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
 								 * through the 'dp_netdevs' shash while freeing 'dp'. */
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								static void
 								dp_netdev_free(struct dp_netdev *dp)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQUIRES(dp_netdev_mutex)
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								{
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    struct dp_netdev_port *port;
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    struct dp_netdev_stats *bucket;
 								    int i;
-												dpif-netdev: Avoid pointlessly maintaining a port count.

'n_ports' was only used for testing for nonzero, and we can rewrite the
code that does that to more straightforwardly use LIST_FOR_EACH_SAFE.

											
										
										
											2011-08-10 12:40:10 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    shash_find_and_delete(&dp_netdevs, dp->name);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    dp_netdev_destroy_all_pmds(dp);
 								    ovs_mutex_destroy(&dp->non_pmd_mutex);
 								    ovsthread_key_delete(dp->per_pmd_key);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								    dp_netdev_flow_flush(dp);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp->port_mutex);
-												lib/cmap: Simplify iteration with C99 loop declaration.

This further eases porting existing hmap code to use cmap instead.

The iterator variants taking an explicit cursor are retained (renamed)
as they are needed when iteration is to be continued from the last
iterated node.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-11 11:07:43 -07:00
+								    CMAP_FOR_EACH (port, node, &dp->ports) {
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+								        do_del_port(dp, port);
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								    }
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp->port_mutex);
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
 								        ovs_mutex_destroy(&bucket->mutex);
 								        free_cacheline(bucket);
 								    }
 								    ovsthread_stats_destroy(&dp->stats);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    classifier_destroy(&dp->cls);
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    cmap_destroy(&dp->flow_table);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_destroy(&dp->flow_mutex);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_destroy(dp->port_seq);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    cmap_destroy(&dp->ports);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    fat_rwlock_destroy(&dp->upcall_rwlock);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    free(CONST_CAST(char *, dp->name));
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    free(dp);
 								}
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static void
 								dp_netdev_unref(struct dp_netdev *dp)
 								{
 								    if (dp) {
 								        /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
 								         * get a new reference to 'dp' through the 'dp_netdevs' shash. */
 								        ovs_mutex_lock(&dp_netdev_mutex);
-												Use ovs_refcount_unref_relaxed.

After a quick analysis, in most cases the access to refcounted objects
is clearly protected either with an explicit lock/mutex, or RCU. there
are only a few places where I left a call to ovs_refcount_unref().
Upon closer analysis it may well be that those could also use the
relaxed form.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-07 13:18:46 -07:00
+								        if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								            dp_netdev_free(dp);
 								        }
 								        ovs_mutex_unlock(&dp_netdev_mutex);
 								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
 								dpif_netdev_close(struct dpif *dpif)
 								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    dp_netdev_unref(dp);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    free(dpif);
 								}
 								static int
-												Fix some regressions from the merge from master.

											
										
										
											2010-02-08 13:22:41 -05:00
+								dpif_netdev_destroy(struct dpif *dpif)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    if (!atomic_flag_test_and_set(&dp->destroyed)) {
-												Use ovs_refcount_unref_relaxed.

After a quick analysis, in most cases the access to refcounted objects
is clearly protected either with an explicit lock/mutex, or RCU. there
are only a few places where I left a call to ovs_refcount_unref().
Upon closer analysis it may well be that those could also use the
relaxed form.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-07 13:18:46 -07:00
+								        if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) {
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								            /* Can't happen: 'dpif' still owns a reference to 'dp'. */
 								            OVS_NOT_REACHED();
 								        }
 								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
 								static int
-												dpif: Avoid use of  "struct ovs_dp_stats" in platform-independent modules.

Over time we wish to reduce the number of datapath-protocol.h definitions
used directly outside of Linux-specific code.  This commit removes use of
"struct ovs_dp_stats" from platform-independent code.

Bug #7559.

											
										
										
											2011-10-05 11:18:13 -07:00
+								dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    struct dp_netdev_stats *bucket;
 								    size_t i;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    stats->n_flows = cmap_count(&dp->flow_table);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    stats->n_hit = stats->n_missed = stats->n_lost = 0;
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
 								        ovs_mutex_lock(&bucket->mutex);
 								        stats->n_hit += bucket->n[DP_STAT_HIT];
 								        stats->n_missed += bucket->n[DP_STAT_MISS];
 								        stats->n_lost += bucket->n[DP_STAT_LOST];
 								        ovs_mutex_unlock(&bucket->mutex);
 								    }
-												dpif-linux: fix the size of n_masks

The command ovs-dpctl can wrongly output the masks even if the
datapath does not implement mega flows. In this case the output
will be similar to the following:

system@ovs-system:
	lookups: hit:14 missed:41 lost:0
	flows: 0
	masks: hit:18446744073709551615 total:4294967295
		hit/pkt:335395346794719104.00
	port 0: ovs-system (internal)
	port 1: gre_system (gre: df_default=false, ttl=0)
	port 2: ots-br0 (internal)
	port 3: int0 (internal)
	port 4: vnet0
	port 5: vnet1

The problem depends on the fact that n_masks stats is stored as a
uint32 in the struct ovs_dp_megaflow_stats and as a uint64 in the
struct dpif_dp_stats. UINT32_MAX instead of UINT64_MAX should be
used to detect if the datapath supports megaflows or not.

Signed-off-by: Francesco Fusco <ffusco@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-17 20:18:18 +01:00
+								    stats->n_masks = UINT32_MAX;
-												dpif-linux: collect and display mega flow mask stats

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-10-21 14:37:34 -07:00
+								    stats->n_mask_hit = UINT64_MAX;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								static void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								{
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    int old_seq;
 								    atomic_add_relaxed(&pmd->change_seq, 1, &old_seq);
 								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								/* Causes all pmd threads to reload its tx/rx devices.
 								 * Must be called after adding/removing ports. */
 								static void
 								dp_netdev_reload_pmds(struct dp_netdev *dp)
 								{
 								    struct dp_netdev_pmd_thread *pmd;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
 								        dp_netdev_reload_pmd__(pmd);
-												lib/dpif-netdev: Clean-up pmd thread signaling.

It could be possible that the thread misses a signal when it reads the
change_seq again after reload.  Also, the counter has no dependent
data, so the memory model for the atomic read can be relaxed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
											
										
										
											2014-08-15 15:09:38 -07:00
+								    }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								}
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								static uint32_t
 								hash_port_no(odp_port_t port_no)
 								{
 								    return hash_int(odp_to_u32(port_no), 0);
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								            odp_port_t port_no)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    OVS_REQUIRES(dp->port_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    struct netdev_saved_flags *sf;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dp_netdev_port *port;
 								    struct netdev *netdev;
-												dpif-netdev: Do not allow adding loopback devices

Signed-off-by: Alexandru Copot <alex.mihai.c@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-07 12:35:15 +03:00
+								    enum netdev_flags flags;
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    const char *open_type;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    int error;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    int i;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    /* XXX reject devices already in some dp_netdev. */
 								    /* Open and validate network device. */
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								    open_type = dpif_netdev_port_open_type(dp->class, type);
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    error = netdev_open(devname, open_type, &netdev);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    if (error) {
 								        return error;
 								    }
 								    /* XXX reject non-Ethernet devices */
-												dpif-netdev: Do not allow adding loopback devices

Signed-off-by: Alexandru Copot <alex.mihai.c@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-07 12:35:15 +03:00
+								    netdev_get_flags(netdev, &flags);
 								    if (flags & NETDEV_LOOPBACK) {
 								        VLOG_ERR("%s: cannot add a loopback device", devname);
 								        netdev_close(netdev);
 								        return EINVAL;
 								    }
-												dpif-netdev: Create multiple tx/rx queues when adding dpdk interface.

Before this commit, ovs creates one tx and one rx queue for
each dpdk interface and uses only one poll thread for handling
I/O of all dpdk interfaces.  An upcoming patch will allow multiple
poll threads be created.  As a preparation, this commit changes
the dpif-netdev to create multiple tx/rx queues when the dpdk
interface is added.

Specifically, the number of rx queues will still be one per-dpdk
interface for this commit.  But upcoming work will allow user
create multiple rx queues.  The number of tx queues will be the
number of cpu cores on the machine.  Although not all the tx queues
will be used, each poll thread will have its own queue for
transmission on the dpdk interface.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-06-17 10:52:20 -07:00
+								    if (netdev_is_pmd(netdev)) {
 								        int n_cores = ovs_numa_get_n_cores();
 								        if (n_cores == OVS_CORE_UNSPEC) {
 								            VLOG_ERR("%s, cannot get cpu core info", devname);
 								            return ENOENT;
 								        }
 								        /* There can only be ovs_numa_get_n_cores() pmd threads,
 								         * so creates a tx_q for each. */
 								        error = netdev_set_multiq(netdev, n_cores, NR_QUEUE);
 								        if (error) {
 								            VLOG_ERR("%s, cannot set multiq", devname);
 								            return errno;
 								        }
 								    }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    port = xzalloc(sizeof *port);
 								    port->port_no = port_no;
 								    port->netdev = netdev;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    port->type = xstrdup(type);
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    for (i = 0; i < netdev_n_rxq(netdev); i++) {
 								        error = netdev_rxq_open(netdev, &port->rxq[i], i);
 								        if (error
 								            && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
 								            VLOG_ERR("%s: cannot receive packets on this network device (%s)",
 								                     devname, ovs_strerror(errno));
 								            netdev_close(netdev);
-												dpif-netdev: Fix leaked port, port->rxq, port->type in error path

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
[blp@nicira.com added free of port->type]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-26 18:36:08 +02:00
+								            free(port->type);
 								            free(port->rxq);
 								            free(port);
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								            return error;
 								        }
-												netdev: Clean up and refactor packet receive interface.

The Open vSwitch tree only has one user of the ability for a netdev to
receive packets from a network device.  Thus, this commit simplifies the
common-case use of the netdev interface by replacing the "ethertype" option
from "struct netdev_options" by a new netdev_listen() call.

The only user of netdev_listen() wants to receive all packets from a
network device, so this commit also removes the ability to restrict the
received packets to a particular protocol.  (This ability was once used by
the Open vSwitch integrated DHCP client, but that code has been removed.)

This commit also simplifies and improves the implementation of the code
in netdev-linux that started listening to a network device.  Before, I had
not figured out how to avoid receiving all packets on all devices before
binding to a particular device, but I took a closer look at the kernel code
and figured it out.

I've tested that the userspace datapath (dpif-netdev), the only user of
netdev_recv(), still works after this change.

											
										
										
											2011-08-05 14:15:32 -07:00
+								    }
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    if (error) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        for (i = 0; i < netdev_n_rxq(netdev); i++) {
 								            netdev_rxq_close(port->rxq[i]);
 								        }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        netdev_close(netdev);
-												dpif-netdev: Fix leaked port, port->rxq, port->type in error path

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
[blp@nicira.com added free of port->type]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-26 18:36:08 +02:00
+								        free(port->type);
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								        free(port->rxq);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        free(port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        return error;
 								    }
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    port->sf = sf;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
 								    if (netdev_is_pmd(netdev)) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
 								        dp_netdev_reload_pmds(dp);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    }
 								    ovs_refcount_init(&port->ref_cnt);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_change(dp->port_seq);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    return 0;
 								}
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								static int
 								dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								                     odp_port_t *port_nop)
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												netdev-vport: Don't return static data in netdev_vport_get_dpif_port().

Returning a static data buffer makes code more brittle and definitely
not thread-safe, so this commit switches to using a caller-provided
buffer instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-01 11:05:28 -07:00
+								    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
 								    const char *dpif_port;
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    odp_port_t port_no;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp->port_mutex);
-												netdev-vport: Don't return static data in netdev_vport_get_dpif_port().

Returning a static data buffer makes code more brittle and definitely
not thread-safe, so this commit switches to using a caller-provided
buffer instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-01 11:05:28 -07:00
+								    dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    if (*port_nop != ODPP_NONE) {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        port_no = *port_nop;
 								        error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
-												dpif: Allow the port number to be requested when adding an interface.

The datapath allows requesting a specific port number for a port, but
the dpif interface didn't expose it.  This commit adds that support.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-07-27 23:58:24 -07:00
+								    } else {
-												netdev-vport: Don't return static data in netdev_vport_get_dpif_port().

Returning a static data buffer makes code more brittle and definitely
not thread-safe, so this commit switches to using a caller-provided
buffer instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-01 11:05:28 -07:00
+								        port_no = choose_port(dp, dpif_port);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = port_no == ODPP_NONE ? EFBIG : 0;
-												dpif: Allow the port number to be requested when adding an interface.

The datapath allows requesting a specific port number for a port, but
the dpif interface didn't expose it.  This commit adds that support.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-07-27 23:58:24 -07:00
+								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    if (!error) {
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								        *port_nop = port_no;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								    }
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp->port_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp->port_mutex);
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+								    if (port_no == ODPP_LOCAL) {
 								        error = EINVAL;
 								    } else {
 								        struct dp_netdev_port *port;
 								        error = get_port_by_number(dp, port_no, &port);
 								        if (!error) {
 								            do_del_port(dp, port);
 								        }
 								    }
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp->port_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static bool
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								is_valid_port_number(odp_port_t port_no)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    return port_no != ODPP_NONE;
 								}
 								static struct dp_netdev_port *
 								dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
 								{
 								    struct dp_netdev_port *port;
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        if (port->port_no == port_no) {
 								            return port;
 								        }
 								    }
 								    return NULL;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
 								get_port_by_number(struct dp_netdev *dp,
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								                   odp_port_t port_no, struct dp_netdev_port **portp)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    if (!is_valid_port_number(port_no)) {
 								        *portp = NULL;
 								        return EINVAL;
 								    } else {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        *portp = dp_netdev_lookup_port(dp, port_no);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        return *portp ? 0 : ENOENT;
 								    }
 								}
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								static void
 								port_ref(struct dp_netdev_port *port)
 								{
 								    if (port) {
 								        ovs_refcount_ref(&port->ref_cnt);
 								    }
 								}
-												dpif-netdev: Introduce port_try_ref() to prevent a race.

When pmd thread interates through all ports for queue loading,
the main thread may unreference and 'rcu-free' a port before
pmd thread take new reference of it.  This could cause pmd
thread fail the reference and access freed memory later.

This commit fixes this race by introducing port_try_ref()
which uses ovs_refcount_try_ref_rcu().  And the pmd thread
will only load the port's queue, if port_try_ref() returns
true.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-21 15:54:07 -07:00
+								static bool
 								port_try_ref(struct dp_netdev_port *port)
 								{
 								    if (port) {
 								        return ovs_refcount_try_ref_rcu(&port->ref_cnt);
 								    }
 								    return false;
 								}
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								static void
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								port_destroy__(struct dp_netdev_port *port)
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								{
-												dpif-netdev: Fix another use-after-free in port_unref().

Commit 87400a3d4cc4a (dpif-netdev: Fix use-after-free in port_unref().)
fixed one use-after-free in the common case of port_unref().  However,
there was another, similar case: if port->netdev has no rxqs, then
the netdev_close() causes port->netdev to be destroyed and thus the
following call to netdev_n_rxq() accesses freed memory.  This commit fixes
the problem.

Found by valgrind.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-06-04 15:41:09 -07:00
+								    int n_rxq = netdev_n_rxq(port->netdev);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    int i;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    netdev_close(port->netdev);
 								    netdev_restore_flags(port->sf);
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    for (i = 0; i < n_rxq; i++) {
 								        netdev_rxq_close(port->rxq[i]);
 								    }
 								    free(port->rxq);
 								    free(port->type);
 								    free(port);
 								}
 								static void
 								port_unref(struct dp_netdev_port *port)
 								{
-												Use ovs_refcount_unref_relaxed.

After a quick analysis, in most cases the access to refcounted objects
is clearly protected either with an explicit lock/mutex, or RCU. there
are only a few places where I left a call to ovs_refcount_unref().
Upon closer analysis it may well be that those could also use the
relaxed form.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-07 13:18:46 -07:00
+								    if (port && ovs_refcount_unref_relaxed(&port->ref_cnt) == 1) {
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								        ovsrcu_postpone(port_destroy__, port);
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
 								get_port_by_name(struct dp_netdev *dp,
 								                 const char *devname, struct dp_netdev_port **portp)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    OVS_REQUIRES(dp->port_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev_port *port;
-												lib/cmap: Simplify iteration with C99 loop declaration.

This further eases porting existing hmap code to use cmap instead.

The iterator variants taking an explicit cursor are retained (renamed)
as they are needed when iteration is to be continued from the last
iterated node.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-11 11:07:43 -07:00
+								    CMAP_FOR_EACH (port, node, &dp->ports) {
-												dpif-netdev: Don't run port names through netdev_vport_get_dpif_port().

The ports that exist within a dpif have already been translated through
netdev_vport_get_dpif_port(), so there is no value to translating them
again in the interfaces that query or dump ports (and possibly a drawback
if somehow the translation could change).

After this change, dpif-netdev translates port names in just one place,
the port_add path, which makes dpif-netdev act the same way as dpif-linux
in this respect.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-06-06 15:27:15 -07:00
+								        if (!strcmp(netdev_get_name(port->netdev), devname)) {
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            *portp = port;
 								            return 0;
 								        }
 								    }
 								    return ENOENT;
 								}
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								static int
 								get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id)
 								{
 								    struct dp_netdev_pmd_thread *pmd;
 								    int n_pmds = 0;
 								    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
 								        if (pmd->numa_id == numa_id) {
 								            n_pmds++;
 								        }
 								    }
 								    return n_pmds;
 								}
 								/* Returns 'true' if there is a port with pmd netdev and the netdev
 								 * is on numa node 'numa_id'. */
 								static bool
 								has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id)
 								{
 								    struct dp_netdev_port *port;
 								    CMAP_FOR_EACH (port, node, &dp->ports) {
 								        if (netdev_is_pmd(port->netdev)
 								            && netdev_get_numa_id(port->netdev) == numa_id) {
 								            return true;
 								        }
 								    }
 								    return false;
 								}
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+								static void
 								do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    OVS_REQUIRES(dp->port_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+								    cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_change(dp->port_seq);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    if (netdev_is_pmd(port->netdev)) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        int numa_id = netdev_get_numa_id(port->netdev);
 								        /* If there is no netdev on the numa node, deletes the pmd threads
 								         * for that numa.  Else, just reloads the queues.  */
 								        if (!has_pmd_port_for_numa(dp, numa_id)) {
 								            dp_netdev_del_pmds_on_numa(dp, numa_id);
 								        }
 								        dp_netdev_reload_pmds(dp);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								    port_unref(port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								answer_port_query(const struct dp_netdev_port *port,
 								                  struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Don't run port names through netdev_vport_get_dpif_port().

The ports that exist within a dpif have already been translated through
netdev_vport_get_dpif_port(), so there is no value to translating them
again in the interfaces that query or dump ports (and possibly a drawback
if somehow the translation could change).

After this change, dpif-netdev translates port names in just one place,
the port_add path, which makes dpif-netdev act the same way as dpif-linux
in this respect.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-06-06 15:27:15 -07:00
+								    dpif_port->name = xstrdup(netdev_get_name(port->netdev));
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    dpif_port->type = xstrdup(port->type);
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    dpif_port->port_no = port->port_no;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                                 struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    struct dp_netdev_port *port;
 								    int error;
 								    error = get_port_by_number(dp, port_no, &port);
-												dpif: Add new dpif_port_exists() function.

Provide the ability to determine whether a port exists in a datapath
without having to deal with a "dpif_port" structure as with
dpif_port_query_by_name().  A future patch will use this function.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-17 23:11:53 -07:00
+								    if (!error && dpif_port) {
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								        answer_port_query(port, dpif_port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return error;
 								}
 								static int
 								dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                               struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    struct dp_netdev_port *port;
 								    int error;
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp->port_mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    error = get_port_by_name(dp, devname, &port);
-												dpif: Add new dpif_port_exists() function.

Provide the ability to determine whether a port exists in a datapath
without having to deal with a "dpif_port" structure as with
dpif_port_query_by_name().  A future patch will use this function.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-17 23:11:53 -07:00
+								    if (!error && dpif_port) {
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								        answer_port_query(port, dpif_port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp->port_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return error;
 								}
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								static void
 								dp_netdev_flow_free(struct dp_netdev_flow *flow)
 								{
 								    struct dp_netdev_flow_stats *bucket;
 								    size_t i;
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
 								        ovs_mutex_destroy(&bucket->mutex);
 								        free_cacheline(bucket);
 								    }
 								    ovsthread_stats_destroy(&flow->stats);
 								    cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
 								    dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
 								    free(flow);
 								}
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
+								static void dp_netdev_flow_unref(struct dp_netdev_flow *flow)
 								{
 								    if (ovs_refcount_unref_relaxed(&flow->ref_cnt) == 1) {
 								        ovsrcu_postpone(dp_netdev_flow_free, flow);
 								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
 								    OVS_REQUIRES(dp->flow_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    classifier_remove(&dp->cls, cr);
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    cmap_remove(&dp->flow_table, node, flow_hash(&flow->flow, 0));
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    flow->dead = true;
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
 								    dp_netdev_flow_unref(flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
 								dp_netdev_flow_flush(struct dp_netdev *dp)
 								{
-												cmap, classifier: Avoid unsafe aliasing in iterators.

CMAP_FOR_EACH and CLS_FOR_EACH and their variants tried to use void ** as
a "pointer to any kind of pointer".  That is a violation of the aliasing
rules in ISO C which technically yields undefined behavior.  With GCC 4.1,
it causes both warnings and actual misbehavior.  One option would to add
-fno-strict-aliasing to the compiler flags, but that would only help with
GCC; who knows whether this can be worked around with other compilers.

Instead, this commit rewrites the iterators to avoid disallowed pointer
aliasing.

VMware-BZ: #1287651
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-21 21:00:04 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp->flow_mutex);
-												cmap: Merge CMAP_FOR_EACH_SAFE into CMAP_FOR_EACH.

There isn't any significant downside to making cmap iteration "safe" all
the time, so this drops the _SAFE variant.

Similar changes to CMAP_CURSOR_FOR_EACH and CMAP_CURSOR_FOR_EACH_CONTINUE.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-07-29 09:02:23 -07:00
+								    CMAP_FOR_EACH (netdev_flow, node, &dp->flow_table) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        dp_netdev_remove_flow(dp, netdev_flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_unlock(&dp->flow_mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
 								dpif_netdev_flow_flush(struct dpif *dpif)
 								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dp_netdev_flow_flush(dp);
 								    return 0;
 								}
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								struct dp_netdev_port_state {
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    struct cmap_position position;
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    char *name;
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								};
 								static int
 								dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
 								{
 								    *statep = xzalloc(sizeof(struct dp_netdev_port_state));
 								    return 0;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                           struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    struct dp_netdev_port_state *state = state_;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    struct cmap_node *node;
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    int retval;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    node = cmap_next_position(&dp->ports, &state->position);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    if (node) {
 								        struct dp_netdev_port *port;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        port = CONTAINER_OF(node, struct dp_netdev_port, node);
 								        free(state->name);
 								        state->name = xstrdup(netdev_get_name(port->netdev));
 								        dpif_port->name = state->name;
 								        dpif_port->type = port->type;
 								        dpif_port->port_no = port->port_no;
 								        retval = 0;
 								    } else {
 								        retval = EOF;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    return retval;
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								}
 								static int
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								{
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    struct dp_netdev_port_state *state = state_;
 								    free(state->name);
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    free(state);
 								    return 0;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												Rename UNUSED macro to OVS_UNUSED to avoid naming conflict.

Requested by Jean Tourrilhes <jt@hpl.hp.com>.

											
										
										
											2010-02-11 10:59:47 -08:00
+								dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    uint64_t new_port_seq;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    new_port_seq = seq_read(dpif->dp->port_seq);
 								    if (dpif->last_port_seq != new_port_seq) {
 								        dpif->last_port_seq = new_port_seq;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = ENOBUFS;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = EAGAIN;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
 								dpif_netdev_port_poll_wait(const struct dpif *dpif_)
 								{
 								    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								}
 								static struct dp_netdev_flow *
 								dp_netdev_flow_cast(const struct cls_rule *cr)
 								{
 								    return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow)
 								{
 								    return ovs_refcount_try_ref_rcu(&flow->ref_cnt);
 								}
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								/* netdev_flow_key utilities.
 								 *
 								 * netdev_flow_key is basically a miniflow.  We use these functions
 								 * (netdev_flow_key_clone, netdev_flow_key_equal, ...) instead of the miniflow
 								 * functions (miniflow_clone_inline, miniflow_equal, ...), because:
 								 *
 								 * - Since we are dealing exclusively with miniflows created by
 								 *   miniflow_extract(), if the map is different the miniflow is different.
 								 *   Therefore we can be faster by comparing the map and the miniflow in a
 								 *   single memcmp().
 								 * _ netdev_flow_key's miniflow has always inline values.
 								 * - These functions can be inlined by the compiler.
 								 *
 								 * The following assertions make sure that what we're doing with miniflow is
 								 * safe
 								 */
 								BUILD_ASSERT_DECL(offsetof(struct miniflow, inline_values)
 								                  == sizeof(uint64_t));
 								BUILD_ASSERT_DECL(offsetof(struct netdev_flow_key, flow) == 0);
 								static inline struct netdev_flow_key *
 								miniflow_to_netdev_flow_key(const struct miniflow *mf)
 								{
 								    return (struct netdev_flow_key *) CONST_CAST(struct miniflow *, mf);
 								}
 								/* Given the number of bits set in the miniflow map, returns the size of the
 								 * netdev_flow key */
 								static inline uint32_t
 								netdev_flow_key_size(uint32_t flow_u32s)
 								{
 								    return MINIFLOW_VALUES_SIZE(flow_u32s)
 								           + offsetof(struct miniflow, inline_values);
 								}
 								/* Used to compare 'netdev_flow_key's (miniflows) in the exact match cache. */
 								static inline bool
 								netdev_flow_key_equal(const struct netdev_flow_key *a,
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								                      const struct netdev_flow_key *b,
 								                      uint32_t size)
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								{
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								    return !memcmp(a, b, size);
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								}
 								static inline void
 								netdev_flow_key_clone(struct netdev_flow_key *dst,
 								                      const struct netdev_flow_key *src,
 								                      uint32_t size)
 								{
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								    memcpy(dst, src, size);
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								}
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								static inline bool
 								emc_entry_alive(struct emc_entry *ce)
 								{
 								    return ce->flow && !ce->flow->dead;
 								}
 								static void
 								emc_clear_entry(struct emc_entry *ce)
 								{
 								    if (ce->flow) {
 								        dp_netdev_flow_unref(ce->flow);
 								        ce->flow = NULL;
 								    }
 								}
 								static inline void
 								emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								                 const struct netdev_flow_key *mf, uint32_t hash)
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								{
 								    if (ce->flow != flow) {
 								        if (ce->flow) {
 								            dp_netdev_flow_unref(ce->flow);
 								        }
 								        if (dp_netdev_flow_ref(flow)) {
 								            ce->flow = flow;
 								        } else {
 								            ce->flow = NULL;
 								        }
 								    }
 								    if (mf) {
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								        uint32_t mf_len = netdev_flow_key_size(count_1bits(mf->flow.map));
 								        netdev_flow_key_clone(&ce->mf, mf, mf_len);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								        ce->hash = hash;
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								        ce->mf_len = mf_len;
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    }
 								}
 								static inline void
 								emc_insert(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash,
 								           struct dp_netdev_flow *flow)
 								{
 								    struct emc_entry *to_be_replaced = NULL;
 								    struct emc_entry *current_entry;
 								    EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) {
 								        if (current_entry->hash == hash
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								            && netdev_flow_key_equal(&current_entry->mf,
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								                                     miniflow_to_netdev_flow_key(mf),
 								                                     current_entry->mf_len)) {
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
 								            /* We found the entry with the 'mf' miniflow */
 								            emc_change_entry(current_entry, flow, NULL, 0);
 								            return;
 								        }
 								        /* Replacement policy: put the flow in an empty (not alive) entry, or
 								         * in the first entry where it can be */
 								        if (!to_be_replaced
 								            || (emc_entry_alive(to_be_replaced)
 								                && !emc_entry_alive(current_entry))
 								            || current_entry->hash < to_be_replaced->hash) {
 								            to_be_replaced = current_entry;
 								        }
 								    }
 								    /* We didn't find the miniflow in the cache.
 								     * The 'to_be_replaced' entry is where the new flow will be stored */
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								    emc_change_entry(to_be_replaced, flow, miniflow_to_netdev_flow_key(mf),
 								                     hash);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								}
 								static inline struct dp_netdev_flow *
 								emc_lookup(struct emc_cache *cache, const struct miniflow *mf, uint32_t hash)
 								{
 								    struct emc_entry *current_entry;
 								    EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, hash) {
 								        if (current_entry->hash == hash && emc_entry_alive(current_entry)
-												dpif-netdev: Introduce netdev_flow_key_* functions

netdev_flow_key is a miniflow with the following constraints:

1) It is used only inside dpif-netdev.c.
2) It always has inline values.
3) It contains only miniflows created by miniflow_extract().

Therefore, by using these new functions instead of the miniflow_*
ones, we get the following (performance related) benefits:

- Because of (1) the functions can be inlined.
- Because of (2) and (3) the netdev_flow_key can be treated as POD.
  Specifically, because of (3), we can do comparisons with memcmp,
  since if the map is different the miniflow must be different.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:42 +00:00
+								            && netdev_flow_key_equal(&current_entry->mf,
-												dpif-netdev: Store miniflow length in exact match cache

This optimization is done to avoid calling count_1bits(), which, if
the popcnt istruction is not available might is slow. popcnt may not
be available because:

- We are running on old hardware
- (more likely) We're using a generic build (i.e. packaged OVS from a
  distro), not tuned for the specific CPU

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
											
										
										
											2014-09-06 08:10:43 +00:00
+								                                     miniflow_to_netdev_flow_key(mf),
 								                                     current_entry->mf_len)) {
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
 								            /* We found the entry with the 'mf' miniflow */
 								            return current_entry->flow;
 								        }
 								    }
 								    return NULL;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static struct dp_netdev_flow *
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
+								dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key)
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								{
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    struct dp_netdev_flow *netdev_flow;
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
+								    struct cls_rule *rule;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												classifier: Add a batched miniflow lookup function.

Used in a future patch.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:40:47 -07:00
+								    classifier_lookup_miniflow_batch(&dp->cls, &key, &rule, 1);
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
+								    netdev_flow = dp_netdev_flow_cast(rule);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    return netdev_flow;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								}
 								static struct dp_netdev_flow *
 								dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								                             &dp->flow_table) {
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        if (flow_equal(&netdev_flow->flow, flow)) {
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            return netdev_flow;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        }
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return NULL;
 								}
 								static void
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow,
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								                    struct dpif_flow_stats *stats)
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								{
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    struct dp_netdev_flow_stats *bucket;
 								    size_t i;
 								    memset(stats, 0, sizeof *stats);
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
 								        ovs_mutex_lock(&bucket->mutex);
 								        stats->n_packets += bucket->packet_count;
 								        stats->n_bytes += bucket->byte_count;
 								        stats->used = MAX(stats->used, bucket->used);
 								        stats->tcp_flags |= bucket->tcp_flags;
 								        ovs_mutex_unlock(&bucket->mutex);
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								static void
 								dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
 								                            struct ofpbuf *buffer, struct dpif_flow *flow)
 								{
 								    struct flow_wildcards wc;
 								    struct dp_netdev_actions *actions;
 								    minimask_expand(&netdev_flow->cr.match.mask, &wc);
 								    odp_flow_key_from_mask(buffer, &wc.masks, &netdev_flow->flow,
 								                           odp_to_u32(wc.masks.in_port.odp_port),
 								                           SIZE_MAX, true);
 								    flow->mask = ofpbuf_data(buffer);
 								    flow->mask_len = ofpbuf_size(buffer);
 								    actions = dp_netdev_flow_get_actions(netdev_flow);
 								    flow->actions = actions->actions;
 								    flow->actions_len = actions->size;
 								    get_dpif_flow_stats(netdev_flow, &flow->stats);
 								}
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								static int
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
 								                              const struct nlattr *mask_key,
 								                              uint32_t mask_key_len, const struct flow *flow,
 								                              struct flow *mask)
 								{
 								    if (mask_key_len) {
-												dpif-netdev: Make a log message more detailed.

This would have helped me track down a bug I was hunting just now.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-02-04 08:07:45 -08:00
+								        enum odp_key_fitness fitness;
 								        fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
 								        if (fitness) {
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            /* This should not happen: it indicates that
 								             * odp_flow_key_from_mask() and odp_flow_key_to_mask()
 								             * disagree on the acceptable form of a mask.  Log the problem
 								             * as an error, with enough details to enable debugging. */
 								            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 								            if (!VLOG_DROP_ERR(&rl)) {
 								                struct ds s;
 								                ds_init(&s);
 								                odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
 								                                true);
-												dpif-netdev: Make a log message more detailed.

This would have helped me track down a bug I was hunting just now.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-02-04 08:07:45 -08:00
+								                VLOG_ERR("internal error parsing flow mask %s (%s)",
 								                         ds_cstr(&s), odp_key_fitness_to_string(fitness));
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                ds_destroy(&s);
 								            }
 								            return EINVAL;
 								        }
 								    } else {
 								        enum mf_field_id id;
 								        /* No mask key, unwildcard everything except fields whose
 								         * prerequisities are not met. */
 								        memset(mask, 0x0, sizeof *mask);
 								        for (id = 0; id < MFF_N_IDS; ++id) {
 								            /* Skip registers and metadata. */
 								            if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
 								                && id != MFF_METADATA) {
 								                const struct mf_field *mf = mf_from_id(id);
 								                if (mf_are_prereqs_ok(mf, flow)) {
 								                    mf_mask_field(mf, mask);
 								                }
 								            }
 								        }
 								    }
-												dpif-netdev: Unwildcard entire odp_port in dpif_netdev_mask_from_nlattrs().

One case in the dpif_netdev_mask_from_nlattrs() function accidentally
wildcarded only a 16-bit subset of the mask's odp_port.  On little-endian
machines this subset was the lower bits, which happened to work out OK,
but on big-endian machines this subset was the upper bits, which doesn't
work and causes a test failure.  (The problem was actually visible in the
test expected results on little-endian machines, but we had not noticed.)

This commit unwildcards the whole field, fixing the problem, and updates
the test expected results to match.

This fixes the failure of test 732 seen here:
https://buildd.debian.org/status/fetch.php?pkg=openvswitch&arch=sparc&ver=2.1.0%2Bgit20140325-1&stamp=1396438624

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-04-05 10:27:05 -07:00
+								    /* Force unwildcard the in_port.
 								     *
 								     * We need to do this even in the case where we unwildcard "everything"
 								     * above because "everything" only includes the 16-bit OpenFlow port number
 								     * mask->in_port.ofp_port, which only covers half of the 32-bit datapath
 								     * port number mask->in_port.odp_port. */
 								    mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    return 0;
 								}
 								static int
 								dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
 								                              struct flow *flow)
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								{
-												dpif-netdev: Make "packet-out" with in_port=OFPP_CONTROLLER work again.

Commit 4e022ec09e14 (Create specific types for ofp and odp port) broke
OpenFlow OFPP_PACKET_OUT requests that use in_port=OFPP_CONTROLLER.  This
commit fixes the problem and adds a regression test.

CC: Alex Wang <alexw@nicira.com>
Reported-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-09 09:23:02 -07:00
+								    odp_port_t in_port;
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    if (odp_flow_key_to_flow(key, key_len, flow)) {
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								        /* This should not happen: it indicates that odp_flow_key_from_flow()
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								         * and odp_flow_key_to_flow() disagree on the acceptable form of a
 								         * flow.  Log the problem as an error, with enough details to enable
 								         * debugging. */
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 								        if (!VLOG_DROP_ERR(&rl)) {
 								            struct ds s;
 								            ds_init(&s);
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
 								            ds_destroy(&s);
 								        }
 								        return EINVAL;
 								    }
-												dpif-netdev: Make "packet-out" with in_port=OFPP_CONTROLLER work again.

Commit 4e022ec09e14 (Create specific types for ofp and odp port) broke
OpenFlow OFPP_PACKET_OUT requests that use in_port=OFPP_CONTROLLER.  This
commit fixes the problem and adds a regression test.

CC: Alex Wang <alexw@nicira.com>
Reported-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-09 09:23:02 -07:00
+								    in_port = flow->in_port.odp_port;
 								    if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
-												datapath: Allow a packet with no input port to omit OVS_KEY_ATTR_IN_PORT.

When ovs-vswitchd executes actions on a synthesized packet, that is, on a
packet that is not being forwarded from any particular port but is being
generated by ovs-vswitchd itself or by an OpenFlow controller (using a
OFPT_PACKET_OUT message with an in_port of OFPP_NONE), there is no good
choice for the in_port to pass to the kernel in the flow in the
OVS_PACKET_CMD_EXECUTE message.  This commit allows ovs-vswitchd to omit
the in_port entirely in this case.

This fixes a bug in OFPT_PACKET_OUT: using an in_port of OFPP_NONE would
cause the packet to be dropped by the kernel, since that's an invalid
input port.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Reported-by: Aaron Rosen <arosen@clemson.edu>

											
										
										
											2011-09-08 16:30:20 -07:00
+								        return EINVAL;
 								    }
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    return 0;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												datapath: Change ODP_FLOW_GET to retrieve only a single flow at a time.

This brings the code closer to what the Netlink interface will need to
implement.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:40:58 -08:00
+								    struct flow key;
 								    int error;
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								    error = dpif_netdev_flow_from_nlattrs(get->key, get->key_len, &key);
-												datapath: Change ODP_FLOW_GET to retrieve only a single flow at a time.

This brings the code closer to what the Netlink interface will need to
implement.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:40:58 -08:00
+								    if (error) {
 								        return error;
 								    }
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    netdev_flow = dp_netdev_find_flow(dp, &key);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    if (netdev_flow) {
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								        dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->flow);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								     } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = ENOENT;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												datapath: Change ODP_FLOW_GET to retrieve only a single flow at a time.

This brings the code closer to what the Netlink interface will need to
implement.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:40:58 -08:00
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								dp_netdev_flow_add(struct dp_netdev *dp, struct match *match,
 								                   const struct nlattr *actions, size_t actions_len)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQUIRES(dp->flow_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    netdev_flow = xzalloc(sizeof *netdev_flow);
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								    *CONST_CAST(struct flow *, &netdev_flow->flow) = match->flow;
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Reintroduce ref_cnt for dp_netdev_flow

struct dp_netdev_flow used to have a reference counter.
It has been replaced by RCU. Unfortunately RCU is not
enough if we plan to hold a reference to the dp_netdev_flow
for a long time. So this commit reintroduces reference
counting for struct dp_netdev_flow

Subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-11 17:25:50 -07:00
+								    ovs_refcount_init(&netdev_flow->ref_cnt);
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    ovsthread_stats_init(&netdev_flow->stats);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    ovsrcu_set(&netdev_flow->actions,
 								               dp_netdev_actions_create(actions, actions_len));
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								                  match, NETDEV_RULE_PRIORITY);
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    cmap_insert(&dp->flow_table,
 								                CONST_CAST(struct cmap_node *, &netdev_flow->node),
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								                flow_hash(&match->flow, 0));
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    classifier_insert(&dp->cls,
 								                      CONST_CAST(struct cls_rule *, &netdev_flow->cr));
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
 								        struct ds ds = DS_EMPTY_INITIALIZER;
 								        ds_put_cstr(&ds, "flow_add: ");
 								        match_format(match, &ds, OFP_DEFAULT_PRIORITY);
 								        ds_put_cstr(&ds, ", actions:");
 								        format_odp_actions(&ds, actions, actions_len);
 								        VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds));
 								        ds_destroy(&ds);
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
 								static void
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								clear_stats(struct dp_netdev_flow *netdev_flow)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    struct dp_netdev_flow_stats *bucket;
 								    size_t i;
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
 								        ovs_mutex_lock(&bucket->mutex);
 								        bucket->used = 0;
 								        bucket->packet_count = 0;
 								        bucket->byte_count = 0;
 								        bucket->tcp_flags = 0;
 								        ovs_mutex_unlock(&bucket->mutex);
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
+								    struct miniflow miniflow;
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								    struct match match;
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    int error;
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								    error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    if (error) {
 								        return error;
 								    }
 								    error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
 								                                          put->mask, put->mask_len,
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								                                          &match.flow, &match.wc.masks);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    if (error) {
 								        return error;
 								    }
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								    miniflow_init(&miniflow, &match.flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp->flow_mutex);
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
+								    netdev_flow = dp_netdev_lookup_flow(dp, &miniflow);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    if (!netdev_flow) {
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								        if (put->flags & DPIF_FP_CREATE) {
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								            if (cmap_count(&dp->flow_table) < MAX_FLOWS) {
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								                if (put->stats) {
 								                    memset(put->stats, 0, sizeof *put->stats);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                }
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								                error = dp_netdev_flow_add(dp, &match, put->actions,
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								                                           put->actions_len);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								                error = EFBIG;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            }
 								        } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								            error = ENOENT;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        }
 								    } else {
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        if (put->flags & DPIF_FP_MODIFY
-												dpif-netdev: Avoid useless flow copy in dp_netdev_flow_add().

This patch gives dp_netdev_flow_add() a match with which it can
initialize the classifier rule.  This prevents it from needing to copy
a flow and flow_wildcards into the match first.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-27 18:56:45 -07:00
+								            && flow_equal(&match.flow, &netdev_flow->flow)) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								            struct dp_netdev_actions *new_actions;
 								            struct dp_netdev_actions *old_actions;
 								            new_actions = dp_netdev_actions_create(put->actions,
 								                                                   put->actions_len);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            old_actions = dp_netdev_flow_get_actions(netdev_flow);
 								            ovsrcu_set(&netdev_flow->actions, new_actions);
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								            if (put->stats) {
 								                get_dpif_flow_stats(netdev_flow, put->stats);
 								            }
 								            if (put->flags & DPIF_FP_ZERO_STATS) {
 								                clear_stats(netdev_flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            ovsrcu_postpone(dp_netdev_actions_free, old_actions);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        } else if (put->flags & DPIF_FP_CREATE) {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								            error = EEXIST;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        } else {
 								            /* Overlapping flow. */
 								            error = EINVAL;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        }
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_unlock(&dp->flow_mutex);
-												dpif-netdev: Fix memory leak in dpif_netdev_flow_put()

miniflow_destroy() needs to be called after using miniflow_init().
Otherwise, if the miniflow mallocs data, then a memory leak may
occur.

Found by inspection.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 13:05:17 -07:00
+								    miniflow_destroy(&miniflow);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
+								    struct flow key;
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    int error;
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								    error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    if (error) {
 								        return error;
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp->flow_mutex);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    netdev_flow = dp_netdev_find_flow(dp, &key);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    if (netdev_flow) {
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								        if (del->stats) {
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								            get_dpif_flow_stats(netdev_flow, del->stats);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								        }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        dp_netdev_remove_flow(dp, netdev_flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = ENOENT;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_unlock(&dp->flow_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								struct dpif_netdev_flow_dump {
 								    struct dpif_flow_dump up;
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    struct cmap_position pos;
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    int status;
 								    struct ovs_mutex mutex;
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								};
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								static struct dpif_netdev_flow_dump *
 								dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up);
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								}
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								static struct dpif_flow_dump *
 								dpif_netdev_flow_dump_create(const struct dpif *dpif_)
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								{
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    struct dpif_netdev_flow_dump *dump;
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    dump = xmalloc(sizeof *dump);
 								    dpif_flow_dump_init(&dump->up, dpif_);
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								    memset(&dump->pos, 0, sizeof dump->pos);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    dump->status = 0;
 								    ovs_mutex_init(&dump->mutex);
 								    return &dump->up;
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								}
 								static int
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_)
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								{
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    ovs_mutex_destroy(&dump->mutex);
 								    free(dump);
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    return 0;
 								}
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								struct dpif_netdev_flow_dump_thread {
 								    struct dpif_flow_dump_thread up;
 								    struct dpif_netdev_flow_dump *dump;
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
 								    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								};
 								static struct dpif_netdev_flow_dump_thread *
 								dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
 								{
 								    return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up);
 								}
 								static struct dpif_flow_dump_thread *
 								dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_)
 								{
 								    struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
 								    struct dpif_netdev_flow_dump_thread *thread;
 								    thread = xmalloc(sizeof *thread);
 								    dpif_flow_dump_thread_init(&thread->up, &dump->up);
 								    thread->dump = dump;
 								    return &thread->up;
 								}
 								static void
 								dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
 								{
 								    struct dpif_netdev_flow_dump_thread *thread
 								        = dpif_netdev_flow_dump_thread_cast(thread_);
 								    free(thread);
 								}
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								static int
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								                           struct dpif_flow *flows, int max_flows)
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								{
 								    struct dpif_netdev_flow_dump_thread *thread
 								        = dpif_netdev_flow_dump_thread_cast(thread_);
 								    struct dpif_netdev_flow_dump *dump = thread->dump;
 								    struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								    struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								    int n_flows = 0;
 								    int i;
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    ovs_mutex_lock(&dump->mutex);
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								    if (!dump->status) {
 								        for (n_flows = 0; n_flows < MIN(max_flows, FLOW_DUMP_MAX_BATCH);
 								             n_flows++) {
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								            struct cmap_node *node;
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
-												dpif-netdev: Use cmap instead of hmap.

This requires less locking and makes introducing lockless classifier
lookups possible.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-07-04 06:38:47 -07:00
+								            node = cmap_next_position(&dp->flow_table, &dump->pos);
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								            if (!node) {
 								                dump->status = EOF;
 								                break;
 								            }
 								            netdev_flows[n_flows] = CONTAINER_OF(node, struct dp_netdev_flow,
 								                                                 node);
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								        }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    }
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    ovs_mutex_unlock(&dump->mutex);
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								    for (i = 0; i < n_flows; i++) {
 								        struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
 								        struct odputil_keybuf *keybuf = &thread->keybuf[i];
 								        struct dp_netdev_flow *netdev_flow = netdev_flows[i];
 								        struct dpif_flow *f = &flows[i];
 								        struct dp_netdev_actions *dp_actions;
 								        struct flow_wildcards wc;
 								        struct ofpbuf buf;
 								        minimask_expand(&netdev_flow->cr.match.mask, &wc);
 								        /* Key. */
 								        ofpbuf_use_stack(&buf, keybuf, sizeof *keybuf);
 								        odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks,
 								                               netdev_flow->flow.in_port.odp_port, true);
 								        f->key = ofpbuf_data(&buf);
 								        f->key_len = ofpbuf_size(&buf);
 								        /* Mask. */
 								        ofpbuf_use_stack(&buf, maskbuf, sizeof *maskbuf);
 								        odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
 								                               odp_to_u32(wc.masks.in_port.odp_port),
 								                               SIZE_MAX, true);
 								        f->mask = ofpbuf_data(&buf);
 								        f->mask_len = ofpbuf_size(&buf);
 								        /* Actions. */
 								        dp_actions = dp_netdev_flow_get_actions(netdev_flow);
 								        f->actions = dp_actions->actions;
 								        f->actions_len = dp_actions->size;
 								        /* Stats. */
 								        get_dpif_flow_stats(netdev_flow, &f->stats);
 								    }
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
-												dpif-netdev: Implement batched flow dumping.

Previously, flows were retrieved one by one when dumping flows for
datapaths of type 'netdev'. This increased contention for the dump's
mutex, negatively affecting revalidator performance.

This patch retrieves batches of flows when dumping flows for datapaths
of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
[blp@nicira.com relaxed max_flows restriction]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-23 12:36:11 -07:00
+								    return n_flows;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_pmd_thread *pmd;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    struct dpif_packet packet, *pp;
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    struct pkt_metadata *md = &execute->md;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												ofpbuf: Introduce access api for base, data and size.

These functions will be used by later patches.  Following patch
does not change functionality.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-30 01:31:50 -07:00
+								    if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN ||
 								        ofpbuf_size(execute->packet) > UINT16_MAX) {
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        return EINVAL;
 								    }
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								    packet.ofpbuf = *execute->packet;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    pp = &packet;
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    /* Tries finding the 'pmd'.  If NULL is returned, that means
 								     * the current thread is a non-pmd thread and should use
 								     * dp_netdev_get_nonpmd(). */
 								    pmd = ovsthread_getspecific(dp->per_pmd_key);
 								    if (!pmd) {
 								        pmd = dp_netdev_get_nonpmd(dp);
 								    }
 								    /* If the current thread is non-pmd thread, acquires
 								     * the 'non_pmd_mutex'. */
 								    if (pmd->core_id == NON_PMD_CORE_ID) {
 								        ovs_mutex_lock(&dp->non_pmd_mutex);
 								    }
 								    dp_netdev_execute_actions(pmd, &pp, 1, false, md, execute->actions,
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								                              execute->actions_len);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    if (pmd->core_id == NON_PMD_CORE_ID) {
 								        ovs_mutex_unlock(&dp->non_pmd_mutex);
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: use dpif_packet structure for packets

This commit introduces a new data structure used for receiving packets from
netdevs and passing them to dpifs.
The purpose of this change is to allow storing some private data for each
packet. The subsequent commits make use of it.

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:57 -07:00
+								    /* Even though may_steal is set to false, some actions could modify or
 								     * reallocate the ofpbuf memory. We need to pass those changes to the
 								     * caller */
 								    *execute->packet = packet.ofpbuf;
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    return 0;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								static void
 								dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
 								{
 								    size_t i;
 								    for (i = 0; i < n_ops; i++) {
 								        struct dpif_op *op = ops[i];
 								        switch (op->type) {
 								        case DPIF_OP_FLOW_PUT:
 								            op->error = dpif_netdev_flow_put(dpif, &op->u.flow_put);
 								            break;
 								        case DPIF_OP_FLOW_DEL:
 								            op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del);
 								            break;
 								        case DPIF_OP_EXECUTE:
 								            op->error = dpif_netdev_execute(dpif, &op->u.execute);
 								            break;
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
 								        case DPIF_OP_FLOW_GET:
 								            op->error = dpif_netdev_flow_get(dpif, &op->u.flow_get);
 								            break;
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								        }
 								    }
 								}
-												dpif-netdev: Allow enqueue actions.

The dpif-netdev implementation disallowed enqueue actions because
it did not support conversion from OVS 'queue_id' to dpif
'priority'.  For testing purposes, this patch allows queues which
translate into NOOPs.

											
										
										
											2011-11-21 13:36:17 -08:00
+								static int
 								dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
 								                              uint32_t queue_id, uint32_t *priority)
 								{
 								    *priority = queue_id;
 								    return 0;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
 								 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
 								 * 'ofpacts'. */
 								struct dp_netdev_actions *
 								dp_netdev_actions_create(const struct nlattr *actions, size_t size)
 								{
 								    struct dp_netdev_actions *netdev_actions;
 								    netdev_actions = xmalloc(sizeof *netdev_actions);
 								    netdev_actions->actions = xmemdup(actions, size);
 								    netdev_actions->size = size;
 								    return netdev_actions;
 								}
 								struct dp_netdev_actions *
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								{
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								}
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								static void
 								dp_netdev_actions_free(struct dp_netdev_actions *actions)
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								{
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    free(actions->actions);
 								    free(actions);
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												sparse: workaround for a bug in sparse.

sparse emits the following warning:
lib/dpif-netdev.c:1755:15: warning: Initializer entry defined twice
lib/dpif-netdev.c:1755:15:   also defined here
due to a bug in sparse which doesn't like inlined functions which
expands a #define within it. This commit removes inline to make
sparse happy.

Signed-off-by: Pritesh Kothari <pritesh.kothari@cisco.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-28 12:20:00 -07:00
+								static void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								                           struct dp_netdev_port *port,
 								                           struct netdev_rxq *rxq)
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								{
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    struct dpif_packet *packets[NETDEV_MAX_RX_BATCH];
 								    int error, cnt;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    error = netdev_rxq_recv(rxq, packets, &cnt);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    if (!error) {
-												lib/dpif-netdev: Make emc_mutex recursive.

dpif_netdev_execute may be called while doing upcall processing.
Since the context of the input port is not tracked upto this point, we
use the shared dp->emc_cache for packet execution, where the emc_cache
is needed for recirculation.

While recursive mutexes can make thread safety analysis hard, for now
we change emc_mutex to be recursive.  Forthcoming new unit tests will
fail with the current non-recursive mutex.  Later improvements may
remove the need for this recursion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Daniele Di Proietto <ddiproietto@vmware.com>
											
										
										
											2014-09-08 15:33:00 -07:00
+								        struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
 								        *recirc_depth_get() = 0;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        dp_netdev_input(pmd, packets, cnt, &md);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    } else if (error != EAGAIN && error != EOPNOTSUPP) {
-												lib/dpif-netdev: Make emc_mutex recursive.

dpif_netdev_execute may be called while doing upcall processing.
Since the context of the input port is not tracked upto this point, we
use the shared dp->emc_cache for packet execution, where the emc_cache
is needed for recirculation.

While recursive mutexes can make thread safety analysis hard, for now
we change emc_mutex to be recursive.  Forthcoming new unit tests will
fail with the current non-recursive mutex.  Later improvements may
remove the need for this recursion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Daniele Di Proietto <ddiproietto@vmware.com>
											
										
										
											2014-09-08 15:33:00 -07:00
+								        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
 								        VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
-												lib/dpif-netdev: Make emc_mutex recursive.

dpif_netdev_execute may be called while doing upcall processing.
Since the context of the input port is not tracked upto this point, we
use the shared dp->emc_cache for packet execution, where the emc_cache
is needed for recirculation.

While recursive mutexes can make thread safety analysis hard, for now
we change emc_mutex to be recursive.  Forthcoming new unit tests will
fail with the current non-recursive mutex.  Later improvements may
remove the need for this recursion.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Daniele Di Proietto <ddiproietto@vmware.com>
											
										
										
											2014-09-08 15:33:00 -07:00
+								                    netdev_get_name(port->netdev), ovs_strerror(error));
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    }
 								}
 								static void
 								dpif_netdev_run(struct dpif *dpif)
 								{
 								    struct dp_netdev_port *port;
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_nonpmd(dp);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    ovs_mutex_lock(&dp->non_pmd_mutex);
-												lib/cmap: Simplify iteration with C99 loop declaration.

This further eases porting existing hmap code to use cmap instead.

The iterator variants taking an explicit cursor are retained (renamed)
as they are needed when iteration is to be continued from the last
iterated node.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-11 11:07:43 -07:00
+								    CMAP_FOR_EACH (port, node, &dp->ports) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        if (!netdev_is_pmd(port->netdev)) {
 								            int i;
 								            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								                dp_netdev_process_rxq_port(non_pmd, port, port->rxq[i]);
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								            }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								    }
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    ovs_mutex_unlock(&dp->non_pmd_mutex);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								}
 								static void
 								dpif_netdev_wait(struct dpif *dpif)
 								{
 								    struct dp_netdev_port *port;
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												lib/cmap: Simplify iteration with C99 loop declaration.

This further eases porting existing hmap code to use cmap instead.

The iterator variants taking an explicit cursor are retained (renamed)
as they are needed when iteration is to be continued from the last
iterated node.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-11 11:07:43 -07:00
+								    CMAP_FOR_EACH (port, node, &dp->ports) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        if (!netdev_is_pmd(port->netdev)) {
 								            int i;
 								            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
 								                netdev_rxq_wait(port->rxq[i]);
 								            }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								    }
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp_netdev_mutex);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								}
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								struct rxq_poll {
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    struct dp_netdev_port *port;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    struct netdev_rxq *rx;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								};
 								static int
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								                struct rxq_poll **ppoll_list, int poll_cnt)
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								{
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								    struct rxq_poll *poll_list = *ppoll_list;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    struct dp_netdev_port *port;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    int n_pmds_on_numa, index, i;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
 								    /* Simple scheduler for netdev rx polling. */
 								    for (i = 0; i < poll_cnt; i++) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        port_unref(poll_list[i].port);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    }
 								    poll_cnt = 0;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    n_pmds_on_numa = get_n_pmd_threads_on_numa(pmd->dp, pmd->numa_id);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    index = 0;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    CMAP_FOR_EACH (port, node, &pmd->dp->ports) {
-												dpif-netdev: Introduce port_try_ref() to prevent a race.

When pmd thread interates through all ports for queue loading,
the main thread may unreference and 'rcu-free' a port before
pmd thread take new reference of it.  This could cause pmd
thread fail the reference and access freed memory later.

This commit fixes this race by introducing port_try_ref()
which uses ovs_refcount_try_ref_rcu().  And the pmd thread
will only load the port's queue, if port_try_ref() returns
true.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-21 15:54:07 -07:00
+								        /* Calls port_try_ref() to prevent the main thread
 								         * from deleting the port. */
 								        if (port_try_ref(port)) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								            if (netdev_is_pmd(port->netdev)
 								                && netdev_get_numa_id(port->netdev) == pmd->numa_id) {
-												dpif-netdev: Introduce port_try_ref() to prevent a race.

When pmd thread interates through all ports for queue loading,
the main thread may unreference and 'rcu-free' a port before
pmd thread take new reference of it.  This could cause pmd
thread fail the reference and access freed memory later.

This commit fixes this race by introducing port_try_ref()
which uses ovs_refcount_try_ref_rcu().  And the pmd thread
will only load the port's queue, if port_try_ref() returns
true.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-21 15:54:07 -07:00
+								                int i;
 								                for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								                    if ((index % n_pmds_on_numa) == pmd->index) {
-												dpif-netdev: Introduce port_try_ref() to prevent a race.

When pmd thread interates through all ports for queue loading,
the main thread may unreference and 'rcu-free' a port before
pmd thread take new reference of it.  This could cause pmd
thread fail the reference and access freed memory later.

This commit fixes this race by introducing port_try_ref()
which uses ovs_refcount_try_ref_rcu().  And the pmd thread
will only load the port's queue, if port_try_ref() returns
true.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-21 15:54:07 -07:00
+								                        poll_list = xrealloc(poll_list,
 								                                        sizeof *poll_list * (poll_cnt + 1));
 								                        port_ref(port);
 								                        poll_list[poll_cnt].port = port;
 								                        poll_list[poll_cnt].rx = port->rxq[i];
 								                        poll_cnt++;
 								                    }
 								                    index++;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								                }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								            }
-												dpif-netdev: Introduce port_try_ref() to prevent a race.

When pmd thread interates through all ports for queue loading,
the main thread may unreference and 'rcu-free' a port before
pmd thread take new reference of it.  This could cause pmd
thread fail the reference and access freed memory later.

This commit fixes this race by introducing port_try_ref()
which uses ovs_refcount_try_ref_rcu().  And the pmd thread
will only load the port's queue, if port_try_ref() returns
true.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-21 15:54:07 -07:00
+								            /* Unrefs the port_try_ref(). */
 								            port_unref(port);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								    }
 								    *ppoll_list = poll_list;
 								    return poll_cnt;
 								}
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								static void *
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								pmd_thread_main(void *f_)
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								{
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_pmd_thread *pmd = f_;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    unsigned int lc = 0;
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								    struct rxq_poll *poll_list;
-												lib/dpif-netdev: Clean-up pmd thread signaling.

It could be possible that the thread misses a signal when it reads the
change_seq again after reload.  Also, the counter has no dependent
data, so the memory model for the atomic read can be relaxed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
											
										
										
											2014-08-15 15:09:38 -07:00
+								    unsigned int port_seq = PMD_INITIAL_SEQ;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    int poll_cnt;
 								    int i;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    poll_cnt = 0;
 								    poll_list = NULL;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 								    ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
 								    pmd_thread_setaffinity_cpu(pmd->core_id);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								reload:
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    emc_cache_init(&pmd->flow_cache);
 								    poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    for (;;) {
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								        int i;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        for (i = 0; i < poll_cnt; i++) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								            dp_netdev_process_rxq_port(pmd, poll_list[i].port, poll_list[i].rx);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								        if (lc++ > 1024) {
-												lib/dpif-netdev: Clean-up pmd thread signaling.

It could be possible that the thread misses a signal when it reads the
change_seq again after reload.  Also, the counter has no dependent
data, so the memory model for the atomic read can be relaxed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
											
										
										
											2014-08-15 15:09:38 -07:00
+								            unsigned int seq;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								            lc = 0;
-												lib/dpif-netdev: Clean-up pmd thread signaling.

It could be possible that the thread misses a signal when it reads the
change_seq again after reload.  Also, the counter has no dependent
data, so the memory model for the atomic read can be relaxed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
											
										
										
											2014-08-15 15:09:38 -07:00
 								            ovsrcu_quiesce();
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								            atomic_read_relaxed(&pmd->change_seq, &seq);
-												lib/dpif-netdev: Clean-up pmd thread signaling.

It could be possible that the thread misses a signal when it reads the
change_seq again after reload.  Also, the counter has no dependent
data, so the memory model for the atomic read can be relaxed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
											
										
										
											2014-08-15 15:09:38 -07:00
+								            if (seq != port_seq) {
 								                port_seq = seq;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								                break;
 								            }
 								        }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    }
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    emc_cache_uninit(&pmd->flow_cache);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    if (!latch_is_set(&pmd->exit_latch)){
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        goto reload;
 								    }
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    for (i = 0; i < poll_cnt; i++) {
 								         port_unref(poll_list[i].port);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    free(poll_list);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    return NULL;
 								}
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								static void
 								dp_netdev_disable_upcall(struct dp_netdev *dp)
 								    OVS_ACQUIRES(dp->upcall_rwlock)
 								{
 								    fat_rwlock_wrlock(&dp->upcall_rwlock);
 								}
 								static void
 								dpif_netdev_disable_upcall(struct dpif *dpif)
 								    OVS_NO_THREAD_SAFETY_ANALYSIS
 								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    dp_netdev_disable_upcall(dp);
 								}
 								static void
 								dp_netdev_enable_upcall(struct dp_netdev *dp)
 								    OVS_RELEASES(dp->upcall_rwlock)
 								{
 								    fat_rwlock_unlock(&dp->upcall_rwlock);
 								}
 								static void
 								dpif_netdev_enable_upcall(struct dpif *dpif)
 								    OVS_NO_THREAD_SAFETY_ANALYSIS
 								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    dp_netdev_enable_upcall(dp);
 								}
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								/* Returns the pointer to the dp_netdev_pmd_thread for non-pmd threads. */
 								static struct dp_netdev_pmd_thread *
 								dp_netdev_get_nonpmd(struct dp_netdev *dp)
 								{
 								    struct dp_netdev_pmd_thread *pmd;
 								    struct cmap_node *pnode;
 								    pnode = cmap_find(&dp->poll_threads, hash_int(NON_PMD_CORE_ID, 0));
 								    ovs_assert(pnode);
 								    pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node);
 								    return pmd;
 								}
 								/* Configures the 'pmd' based on the input argument. */
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								static void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
 								                        int index, int core_id, int numa_id)
 								{
 								    pmd->dp = dp;
 								    pmd->index = index;
 								    pmd->core_id = core_id;
 								    pmd->numa_id = numa_id;
 								    latch_init(&pmd->exit_latch);
 								    atomic_init(&pmd->change_seq, PMD_INITIAL_SEQ);
 								    /* init the 'flow_cache' since there is no
 								     * actual thread created for NON_PMD_CORE_ID. */
 								    if (core_id == NON_PMD_CORE_ID) {
 								        emc_cache_init(&pmd->flow_cache);
 								    }
 								    cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
 								                hash_int(core_id, 0));
 								}
 								/* Stops the pmd thread, removes it from the 'dp->poll_threads'
 								 * and destroys the struct. */
 								static void
 								dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd)
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								{
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    /* Uninit the 'flow_cache' since there is
 								     * no actual thread uninit it. */
 								    if (pmd->core_id == NON_PMD_CORE_ID) {
 								        emc_cache_uninit(&pmd->flow_cache);
 								    } else {
 								        latch_set(&pmd->exit_latch);
 								        dp_netdev_reload_pmd__(pmd);
 								        ovs_numa_unpin_core(pmd->core_id);
 								        xpthread_join(pmd->thread, NULL);
 								    }
 								    cmap_remove(&pmd->dp->poll_threads, &pmd->node, hash_int(pmd->core_id, 0));
 								    latch_destroy(&pmd->exit_latch);
 								    free(pmd);
 								}
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								/* Destroys all pmd threads. */
 								static void
 								dp_netdev_destroy_all_pmds(struct dp_netdev *dp)
 								{
 								    struct dp_netdev_pmd_thread *pmd;
 								    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
 								        dp_netdev_del_pmd(pmd);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    }
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								}
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								/* Deletes all pmd threads on numa node 'numa_id'. */
 								static void
 								dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
 								{
 								    struct dp_netdev_pmd_thread *pmd;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
 								        if (pmd->numa_id == numa_id) {
 								            dp_netdev_del_pmd(pmd);
 								        }
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    }
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								}
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								/* Checks the numa node id of 'netdev' and starts pmd threads for
 								 * the numa node. */
 								static void
 								dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
 								{
 								    int n_pmds;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    if (!ovs_numa_numa_id_is_valid(numa_id)) {
 								        VLOG_ERR("Cannot create pmd threads due to numa id (%d)"
 								                 "invalid", numa_id);
 								        return ;
 								    }
 								    n_pmds = get_n_pmd_threads_on_numa(dp, numa_id);
 								    /* If there are already pmd threads created for the numa node
 								     * in which 'netdev' is on, do nothing.  Else, creates the
 								     * pmd threads for the numa node. */
 								    if (!n_pmds) {
 								        int can_have, n_unpinned, i;
 								        n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
 								        if (!n_unpinned) {
 								            VLOG_ERR("Cannot create pmd threads due to out of unpinned "
 								                     "cores on numa node");
 								            return;
 								        }
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        /* Tries creating NR_PMD_THREADS pmd threads on the numa node. */
 								        can_have = MIN(n_unpinned, NR_PMD_THREADS);
 								        for (i = 0; i < can_have; i++) {
 								            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
 								            int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
 								            /* Each thread will distribute all devices rx-queues among
 								             * themselves. */
 								            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
 								        }
 								        VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    }
 								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								static void *
 								dp_netdev_flow_stats_new_cb(void)
 								{
 								    struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
 								    ovs_mutex_init(&bucket->mutex);
 								    return bucket;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								                    int cnt, int size,
 								                    uint16_t tcp_flags)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    long long int now = time_msec();
 								    struct dp_netdev_flow_stats *bucket;
 								    bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
 								                                        dp_netdev_flow_stats_new_cb);
 								    ovs_mutex_lock(&bucket->mutex);
 								    bucket->used = MAX(now, bucket->used);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    bucket->packet_count += cnt;
 								    bucket->byte_count += size;
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    bucket->tcp_flags |= tcp_flags;
 								    ovs_mutex_unlock(&bucket->mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								static void *
 								dp_netdev_stats_new_cb(void)
 								{
 								    struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
 								    ovs_mutex_init(&bucket->mutex);
 								    return bucket;
 								}
 								static void
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt)
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								{
 								    struct dp_netdev_stats *bucket;
 								    bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
 								    ovs_mutex_lock(&bucket->mutex);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    bucket->n[type] += cnt;
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    ovs_mutex_unlock(&bucket->mutex);
 								}
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								static int
 								dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_,
 								                 struct flow *flow, struct flow_wildcards *wc,
 								                 enum dpif_upcall_type type, const struct nlattr *userdata,
 								                 struct ofpbuf *actions, struct ofpbuf *put_actions)
 								{
 								    struct ofpbuf *packet = &packet_->ofpbuf;
 								    if (type == DPIF_UC_MISS) {
 								        dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
 								    }
 								    if (OVS_UNLIKELY(!dp->upcall_cb)) {
 								        return ENODEV;
 								    }
 								    if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
 								        struct ds ds = DS_EMPTY_INITIALIZER;
 								        struct ofpbuf key;
 								        char *packet_str;
 								        ofpbuf_init(&key, 0);
 								        odp_flow_key_from_flow(&key, flow, &wc->masks, flow->in_port.odp_port,
 								                               true);
 								        packet_str = ofp_packet_to_string(ofpbuf_data(packet),
 								                                          ofpbuf_size(packet));
 								        odp_flow_key_format(ofpbuf_data(&key), ofpbuf_size(&key), &ds);
 								        VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name,
 								                 dpif_upcall_type_to_string(type), ds_cstr(&ds), packet_str);
 								        ofpbuf_uninit(&key);
 								        free(packet_str);
 								        ds_destroy(&ds);
 								    }
 								    return dp->upcall_cb(packet, flow, type, userdata, actions, wc,
 								                         put_actions, dp->upcall_aux);
 								}
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								static inline uint32_t
 								dpif_netdev_packet_get_dp_hash(struct dpif_packet *packet,
 								                               const struct miniflow *mf)
 								{
 								    uint32_t hash;
 								    hash = dpif_packet_get_dp_hash(packet);
 								    if (OVS_UNLIKELY(!hash)) {
 								        hash = miniflow_hash_5tuple(mf, 0);
 								        dpif_packet_set_dp_hash(packet, hash);
 								    }
 								    return hash;
 								}
-												dpif-netdev: Rename batch_pkt_execute.

The new name "packet_batch" is a bit more straight forward.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:22:03 -07:00
+								struct packet_batch {
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    unsigned int packet_count;
 								    unsigned int byte_count;
 								    uint16_t tcp_flags;
 								    struct dp_netdev_flow *flow;
 								    struct dpif_packet *packets[NETDEV_MAX_RX_BATCH];
 								    struct pkt_metadata md;
 								};
 								static inline void
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								packet_batch_update(struct packet_batch *batch, struct dpif_packet *packet,
 								                    const struct miniflow *mf)
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								{
 								    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
 								    batch->packets[batch->packet_count++] = packet;
 								    batch->byte_count += ofpbuf_size(&packet->ofpbuf);
 								}
 								static inline void
-												dpif-netdev: Rename batch_pkt_execute.

The new name "packet_batch" is a bit more straight forward.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:22:03 -07:00
+								packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow,
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								                  struct pkt_metadata *md)
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								{
 								    batch->flow = flow;
 								    batch->md = *md;
 								    batch->packet_count = 0;
 								    batch->byte_count = 0;
 								    batch->tcp_flags = 0;
 								}
 								static inline void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								packet_batch_execute(struct packet_batch *batch,
 								                     struct dp_netdev_pmd_thread *pmd)
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								{
 								    struct dp_netdev_actions *actions;
 								    struct dp_netdev_flow *flow = batch->flow;
 								    dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count,
 								                        batch->tcp_flags);
 								    actions = dp_netdev_flow_get_actions(flow);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    dp_netdev_execute_actions(pmd, batch->packets, batch->packet_count, true,
 								                              &batch->md, actions->actions, actions->size);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    dp_netdev_count_packet(pmd->dp, DP_STAT_HIT, batch->packet_count);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								}
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								static inline bool
 								dp_netdev_queue_batches(struct dpif_packet *pkt, struct pkt_metadata *md,
 								                        struct dp_netdev_flow *flow, const struct miniflow *mf,
 								                        struct packet_batch *batches, size_t *n_batches,
 								                        size_t max_batches)
 								{
 								    struct packet_batch *batch = NULL;
 								    int j;
 								    if (OVS_UNLIKELY(!flow)) {
 								        return false;
 								    }
 								    /* XXX: This O(n^2) algortihm makes sense if we're operating under the
 								     * assumption that the number of distinct flows (and therefore the
 								     * number of distinct batches) is quite small.  If this turns out not
 								     * to be the case, it may make sense to pre sort based on the
 								     * netdev_flow pointer.  That done we can get the appropriate batching
 								     * in O(n * log(n)) instead. */
 								    for (j = *n_batches - 1; j >= 0; j--) {
 								        if (batches[j].flow == flow) {
 								            batch = &batches[j];
 								            packet_batch_update(batch, pkt, mf);
 								            return true;
 								        }
 								    }
 								    if (OVS_UNLIKELY(*n_batches >= max_batches)) {
 								        return false;
 								    }
 								    batch = &batches[(*n_batches)++];
 								    packet_batch_init(batch, flow, md);
 								    packet_batch_update(batch, pkt, mf);
 								    return true;
 								}
 								static inline void
 								dpif_packet_swap(struct dpif_packet **a, struct dpif_packet **b)
 								{
 								    struct dpif_packet *tmp = *a;
 								    *a = *b;
 								    *b = tmp;
 								}
 								/* Try to process all ('cnt') the 'packets' using only the exact match cache
 								 * 'flow_cache'. If a flow is not found for a packet 'packets[i]', or if there
 								 * is no matching batch for a packet's flow, the miniflow is copied into 'keys'
 								 * and the packet pointer is moved at the beginning of the 'packets' array.
 								 *
 								 * The function returns the number of packets that needs to be processed in the
 								 * 'packets' array (they have been moved to the beginning of the vector).
 								 */
 								static inline size_t
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								emc_processing(struct dp_netdev_pmd_thread *pmd, struct dpif_packet **packets,
 								               size_t cnt, struct pkt_metadata *md,
 								               struct netdev_flow_key *keys)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    struct netdev_flow_key key;
 								    struct packet_batch batches[4];
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct emc_cache *flow_cache = &pmd->flow_cache;
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								    size_t n_batches, i;
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    size_t notfound_cnt = 0;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    n_batches = 0;
 								    miniflow_initialize(&key.flow, key.buf);
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								    for (i = 0; i < cnt; i++) {
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								        struct dp_netdev_flow *flow;
 								        uint32_t hash;
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								        if (OVS_UNLIKELY(ofpbuf_size(&packets[i]->ofpbuf) < ETH_HEADER_LEN)) {
 								            dpif_packet_delete(packets[i]);
 								            continue;
 								        }
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								        miniflow_extract(&packets[i]->ofpbuf, md, &key.flow);
 								        hash = dpif_netdev_packet_get_dp_hash(packets[i], &key.flow);
 								        flow = emc_lookup(flow_cache, &key.flow, hash);
 								        if (OVS_UNLIKELY(!dp_netdev_queue_batches(packets[i], md,
 								                                                  flow,  &key.flow,
 								                                                  batches, &n_batches,
 								                                                  ARRAY_SIZE(batches)))) {
 								            if (i != notfound_cnt) {
 								                dpif_packet_swap(&packets[i], &packets[notfound_cnt]);
 								            }
 								            keys[notfound_cnt++] = key;
 								        }
 								    }
 								    for (i = 0; i < n_batches; i++) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        packet_batch_execute(&batches[i], pmd);
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								    }
-												dpif-netdev: Use miniflow as a flow key.

Use miniflow as a flow key in the userspace datapath classifier.  The
miniflow is expanded for upcalls, but for existing datapath flows, the
key need not be expanded.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
											
										
										
											2014-04-18 08:26:57 -07:00
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    return notfound_cnt;
 								}
 								static inline void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								fast_path_processing(struct dp_netdev_pmd_thread *pmd,
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								                     struct dpif_packet **packets, size_t cnt,
 								                     struct pkt_metadata *md, struct netdev_flow_key *keys)
 								{
-												dpif-netdev: Avoid variable length array on MSVC.

MSVC does not like variable length array either.

This patch treats the following error:

lib/dpif-netdev.c(2272) : error C2057: expected constant expression
lib/dpif-netdev.c(2272) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2272) : error C2133: 'batches' : unknown size
lib/dpif-netdev.c(2273) : error C2057: expected constant expression
lib/dpif-netdev.c(2273) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2273) : error C2133: 'mfs' : unknown size
lib/dpif-netdev.c(2274) : error C2057: expected constant expression
lib/dpif-netdev.c(2274) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2274) : error C2133: 'rules' : unknown size
lib/dpif-netdev.c(2363) : warning C4034: sizeof returns 0
lib/dpif-netdev.c(2381) : error C2057: expected constant expression
lib/dpif-netdev.c(2381) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2381) : error C2133: 'keys' : unknown size
make[2]: *** [lib/dpif-netdev.lo] Error 1

Signed-off-by: Alin Gabriel Serdean <aserdean@cloudbasesolutions.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-01 20:11:54 +00:00
+								#if !defined(__CHECKER__) && !defined(_WIN32)
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    const size_t PKT_ARRAY_SIZE = cnt;
 								#else
-												dpif-netdev: Avoid variable length array on MSVC.

MSVC does not like variable length array either.

This patch treats the following error:

lib/dpif-netdev.c(2272) : error C2057: expected constant expression
lib/dpif-netdev.c(2272) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2272) : error C2133: 'batches' : unknown size
lib/dpif-netdev.c(2273) : error C2057: expected constant expression
lib/dpif-netdev.c(2273) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2273) : error C2133: 'mfs' : unknown size
lib/dpif-netdev.c(2274) : error C2057: expected constant expression
lib/dpif-netdev.c(2274) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2274) : error C2133: 'rules' : unknown size
lib/dpif-netdev.c(2363) : warning C4034: sizeof returns 0
lib/dpif-netdev.c(2381) : error C2057: expected constant expression
lib/dpif-netdev.c(2381) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2381) : error C2133: 'keys' : unknown size
make[2]: *** [lib/dpif-netdev.lo] Error 1

Signed-off-by: Alin Gabriel Serdean <aserdean@cloudbasesolutions.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-01 20:11:54 +00:00
+								    /* Sparse or MSVC doesn't like variable length array. */
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH };
 								#endif
 								    struct packet_batch batches[PKT_ARRAY_SIZE];
 								    const struct miniflow *mfs[PKT_ARRAY_SIZE]; /* NULL at bad packets. */
 								    struct cls_rule *rules[PKT_ARRAY_SIZE];
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev *dp = pmd->dp;
 								    struct emc_cache *flow_cache = &pmd->flow_cache;
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    size_t n_batches, i;
 								    bool any_miss;
 								    for (i = 0; i < cnt; i++) {
 								        mfs[i] = &keys[i].flow;
 								    }
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    any_miss = !classifier_lookup_miniflow_batch(&dp->cls, mfs, rules, cnt);
 								    if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
 								        uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
 								        struct ofpbuf actions, put_actions;
 								        struct match match;
 								        ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub);
 								        ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub);
 								        for (i = 0; i < cnt; i++) {
 								            const struct dp_netdev_flow *netdev_flow;
 								            struct ofpbuf *add_actions;
 								            int error;
 								            if (OVS_LIKELY(rules[i] || !mfs[i])) {
 								                continue;
 								            }
 								            /* It's possible that an earlier slow path execution installed
 								             * the rule this flow needs.  In this case, it's a lot cheaper
 								             * to catch it here than execute a miss. */
 								            netdev_flow = dp_netdev_lookup_flow(dp, mfs[i]);
 								            if (netdev_flow) {
 								                rules[i] = CONST_CAST(struct cls_rule *, &netdev_flow->cr);
 								                continue;
 								            }
 								            miniflow_expand(mfs[i], &match.flow);
 								            ofpbuf_clear(&actions);
 								            ofpbuf_clear(&put_actions);
 								            error = dp_netdev_upcall(dp, packets[i], &match.flow, &match.wc,
 								                                      DPIF_UC_MISS, NULL, &actions,
 								                                      &put_actions);
 								            if (OVS_UNLIKELY(error && error != ENOSPC)) {
 								                continue;
 								            }
 								            /* We can't allow the packet batching in the next loop to execute
 								             * the actions.  Otherwise, if there are any slow path actions,
 								             * we'll send the packet up twice. */
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								            dp_netdev_execute_actions(pmd, &packets[i], 1, false, md,
 								                                      ofpbuf_data(&actions),
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								                                      ofpbuf_size(&actions));
 								            add_actions = ofpbuf_size(&put_actions)
 								                ? &put_actions
 								                : &actions;
 								            ovs_mutex_lock(&dp->flow_mutex);
 								            /* XXX: There's a brief race where this flow could have already
 								             * been installed since we last did the flow lookup.  This could be
 								             * solved by moving the mutex lock outside the loop, but that's an
 								             * awful long time to be locking everyone out of making flow
 								             * installs.  If we move to a per-core classifier, it would be
 								             * reasonable. */
 								            if (OVS_LIKELY(error != ENOSPC)
 								                && !dp_netdev_lookup_flow(dp, mfs[i])) {
 								                dp_netdev_flow_add(dp, &match, ofpbuf_data(add_actions),
 								                                   ofpbuf_size(add_actions));
 								            }
 								            ovs_mutex_unlock(&dp->flow_mutex);
 								        }
 								        ofpbuf_uninit(&actions);
 								        ofpbuf_uninit(&put_actions);
 								        fat_rwlock_unlock(&dp->upcall_rwlock);
 								    }
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
 								    n_batches = 0;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    for (i = 0; i < cnt; i++) {
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								        struct dpif_packet *packet = packets[i];
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								        struct dp_netdev_flow *flow;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								        if (OVS_UNLIKELY(!rules[i] || !mfs[i])) {
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								            continue;
 								        }
 								        flow = dp_netdev_flow_cast(rules[i]);
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        emc_insert(flow_cache, mfs[i], dpif_packet_get_dp_hash(packet),
 								                   flow);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								        dp_netdev_queue_batches(packet, md, flow, mfs[i], batches, &n_batches,
 								                                ARRAY_SIZE(batches));
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    }
-												dpif-netdev: Batch megaflow lookup.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-06-23 18:28:43 -07:00
+								    for (i = 0; i < n_batches; i++) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        packet_batch_execute(&batches[i], pmd);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
 								}
-												ofproto/bond: Implement bond megaflow using recirculation

Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-05 15:27:31 -08:00
+								static void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								                struct dpif_packet **packets, int cnt, struct pkt_metadata *md)
 								{
-												dpif-netdev: Avoid variable length array on MSVC.

MSVC does not like variable length array either.

This patch treats the following error:

lib/dpif-netdev.c(2272) : error C2057: expected constant expression
lib/dpif-netdev.c(2272) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2272) : error C2133: 'batches' : unknown size
lib/dpif-netdev.c(2273) : error C2057: expected constant expression
lib/dpif-netdev.c(2273) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2273) : error C2133: 'mfs' : unknown size
lib/dpif-netdev.c(2274) : error C2057: expected constant expression
lib/dpif-netdev.c(2274) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2274) : error C2133: 'rules' : unknown size
lib/dpif-netdev.c(2363) : warning C4034: sizeof returns 0
lib/dpif-netdev.c(2381) : error C2057: expected constant expression
lib/dpif-netdev.c(2381) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2381) : error C2133: 'keys' : unknown size
make[2]: *** [lib/dpif-netdev.lo] Error 1

Signed-off-by: Alin Gabriel Serdean <aserdean@cloudbasesolutions.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-01 20:11:54 +00:00
+								#if !defined(__CHECKER__) && !defined(_WIN32)
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    const size_t PKT_ARRAY_SIZE = cnt;
 								#else
-												dpif-netdev: Avoid variable length array on MSVC.

MSVC does not like variable length array either.

This patch treats the following error:

lib/dpif-netdev.c(2272) : error C2057: expected constant expression
lib/dpif-netdev.c(2272) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2272) : error C2133: 'batches' : unknown size
lib/dpif-netdev.c(2273) : error C2057: expected constant expression
lib/dpif-netdev.c(2273) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2273) : error C2133: 'mfs' : unknown size
lib/dpif-netdev.c(2274) : error C2057: expected constant expression
lib/dpif-netdev.c(2274) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2274) : error C2133: 'rules' : unknown size
lib/dpif-netdev.c(2363) : warning C4034: sizeof returns 0
lib/dpif-netdev.c(2381) : error C2057: expected constant expression
lib/dpif-netdev.c(2381) : error C2466: cannot allocate an array of constant size 0
lib/dpif-netdev.c(2381) : error C2133: 'keys' : unknown size
make[2]: *** [lib/dpif-netdev.lo] Error 1

Signed-off-by: Alin Gabriel Serdean <aserdean@cloudbasesolutions.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-01 20:11:54 +00:00
+								    /* Sparse or MSVC doesn't like variable length array. */
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    enum { PKT_ARRAY_SIZE = NETDEV_MAX_RX_BATCH };
 								#endif
 								    struct netdev_flow_key keys[PKT_ARRAY_SIZE];
 								    size_t newcnt;
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    newcnt = emc_processing(pmd, packets, cnt, md, keys);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    if (OVS_UNLIKELY(newcnt)) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								        fast_path_processing(pmd, packets, newcnt, md, keys);
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								    }
 								}
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								struct dp_netdev_execute_aux {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_pmd_thread *pmd;
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								};
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								static void
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb,
 								                               void *aux)
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    dp->upcall_aux = aux;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    dp->upcall_cb = cb;
 								}
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								static void
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt,
-												dpif-netdev: user space datapath recirculation

Add basic recirculation infrastructure and user space
data path support for it. The following bond mega flow patch will
make use of this infrastructure.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-04 15:36:03 -08:00
+								              struct pkt_metadata *md,
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								              const struct nlattr *a, bool may_steal)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								{
 								    struct dp_netdev_execute_aux *aux = aux_;
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    uint32_t *depth = recirc_depth_get();
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_pmd_thread *pmd= aux->pmd;
 								    struct dp_netdev *dp= pmd->dp;
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    int type = nl_attr_type(a);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    struct dp_netdev_port *p;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    int i;
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    switch ((enum ovs_action_attr)type) {
 								    case OVS_ACTION_ATTR_OUTPUT:
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								        p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
-												dpif-netdev: delete lost packets in dp_execute_cb()

This commit fixes memory leaks in dp_execute_cb() in two cases:
    - when the output port cannot be found
    - when the recirculation depth is exceeded

Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-25 11:39:34 -07:00
+								        if (OVS_LIKELY(p)) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								            netdev_send(p->netdev, pmd->core_id, packets, cnt, may_steal);
-												dpif-netdev: delete lost packets in dp_execute_cb()

This commit fixes memory leaks in dp_execute_cb() in two cases:
    - when the output port cannot be found
    - when the recirculation depth is exceeded

Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-25 11:39:34 -07:00
+								        } else if (may_steal) {
 								            for (i = 0; i < cnt; i++) {
 								                dpif_packet_delete(packets[i]);
 								            }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        }
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								        break;
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    case OVS_ACTION_ATTR_USERSPACE:
 								        if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
 								            const struct nlattr *userdata;
 								            struct ofpbuf actions;
 								            struct flow flow;
-												odp-execute: Refine signatures for odp_execute_actions() callbacks.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-20 12:47:33 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								            userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
 								            ofpbuf_init(&actions, 0);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								            for (i = 0; i < cnt; i++) {
 								                int error;
 								                ofpbuf_clear(&actions);
 								                flow_extract(&packets[i]->ofpbuf, md, &flow);
 								                error = dp_netdev_upcall(dp, packets[i], &flow, NULL,
 								                                         DPIF_UC_ACTION, userdata, &actions,
 								                                         NULL);
 								                if (!error || error == ENOSPC) {
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								                    dp_netdev_execute_actions(pmd, &packets[i], 1, false, md,
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								                                              ofpbuf_data(&actions),
 								                                              ofpbuf_size(&actions));
 								                }
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								                if (may_steal) {
 								                    dpif_packet_delete(packets[i]);
 								                }
-												netdev-dpdk: Fix race condition with DPDK mempools in non pmd threads

DPDK mempools rely on rte_lcore_id() to implement a thread-local cache.
Our non pmd threads had rte_lcore_id() == 0. This allowed concurrent access to
the "thread-local" cache, causing crashes.

This commit resolves the issue with the following changes:

- Every non pmd thread has the same lcore_id (0, for management reasons), which
  is not shared with any pmd thread (lcore_id for pmd threads now start from 1)
- DPDK mbufs must be allocated/freed in pmd threads. When there is the need to
  use mempools in non pmd threads, like in dpdk_do_tx_copy(), a mutex must be
  held.
- The previous change does not allow us anymore to pass DPDK mbufs to handler
  threads: therefore this commit partially revert 143859ec63d45e. Now packets
  are copied for upcall processing. We can remove the extra memcpy by
  processing upcalls in the pmd thread itself.

With the introduction of the extra locking, the packet throughput will be lower
in the following cases:

- When using internal (tap) devices with DPDK devices on the same datapath.
  Anyway, to support internal devices efficiently, we needed DPDK KNI devices,
  which will be proper pmd devices and will not need this locking.
- When packets are processed in the slow path by non pmd threads. This overhead
  can be avoided by handling the upcalls directly in pmd threads (a change that
  has already been proposed by Ryan Wilson)

Also, the following two fixes have been introduced:
- In dpdk_free_buf() use rte_pktmbuf_free_seg() instead of rte_mempool_put().
  This allows OVS to run properly with CONFIG_RTE_LIBRTE_MBUF_DEBUG DPDK option
- Do not bulk free mbufs in a transmission queue. They may belong to different
  mempools

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-07-17 14:29:36 -07:00
+								            }
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								            ofpbuf_uninit(&actions);
 								            fat_rwlock_unlock(&dp->upcall_rwlock);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								        }
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								        break;
-												dpif-netdev: user space datapath recirculation

Add basic recirculation infrastructure and user space
data path support for it. The following bond mega flow patch will
make use of this infrastructure.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-04 15:36:03 -08:00
-												dpif-netdev: Move hash function out of the recirc action, into its own action

Currently recirculation action can optionally compute hash. This patch
adds a hash action that is independent of the recirc action, which
no longer computes hash.  For megaflow bond with recirc, the output
to a bond port action will look like:

    hash(hash_l4(0)), recirc(<recirc_id>)

Obviously, when a recirculation application that does not depend on
hash value can just use the recirc action alone.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Acked-by: Pravin B Shelar <pshelar@nicira.com

											
										
										
											2014-04-08 18:42:39 -07:00
+								    case OVS_ACTION_ATTR_HASH: {
 								        const struct ovs_action_hash *hash_act;
 								        uint32_t hash;
 								        hash_act = nl_attr_get(a);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
 								        for (i = 0; i < cnt; i++) {
 								            if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
 								                /* Hash need not be symmetric, nor does it need to include
 								                 * L2 fields. */
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								                hash = hash_2words(dpif_packet_get_dp_hash(packets[i]),
 								                                   hash_act->hash_basis);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								            } else {
 								                VLOG_WARN("Unknown hash algorithm specified "
 								                          "for the hash action.");
 								                hash = 2;
 								            }
-												dpif-netdev: Move hash function out of the recirc action, into its own action

Currently recirculation action can optionally compute hash. This patch
adds a hash action that is independent of the recirc action, which
no longer computes hash.  For megaflow bond with recirc, the output
to a bond port action will look like:

    hash(hash_l4(0)), recirc(<recirc_id>)

Obviously, when a recirculation application that does not depend on
hash value can just use the recirc action alone.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Acked-by: Pravin B Shelar <pshelar@nicira.com

											
										
										
											2014-04-08 18:42:39 -07:00
+								            if (!hash) {
 								                hash = 1; /* 0 is not valid */
 								            }
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								            if (i == 0) {
 								                md->dp_hash = hash;
 								            }
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								            dpif_packet_set_dp_hash(packets[i], hash);
-												dpif-netdev: Move hash function out of the recirc action, into its own action

Currently recirculation action can optionally compute hash. This patch
adds a hash action that is independent of the recirc action, which
no longer computes hash.  For megaflow bond with recirc, the output
to a bond port action will look like:

    hash(hash_l4(0)), recirc(<recirc_id>)

Obviously, when a recirculation application that does not depend on
hash value can just use the recirc action alone.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Reviewed-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Acked-by: Pravin B Shelar <pshelar@nicira.com

											
										
										
											2014-04-08 18:42:39 -07:00
+								        }
 								        break;
 								    }
-												ofproto/bond: Implement bond megaflow using recirculation

Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-05 15:27:31 -08:00
+								    case OVS_ACTION_ATTR_RECIRC:
 								        if (*depth < MAX_RECIRC_DEPTH) {
-												dpif-netdev: user space datapath recirculation

Add basic recirculation infrastructure and user space
data path support for it. The following bond mega flow patch will
make use of this infrastructure.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-04 15:36:03 -08:00
-												ofproto/bond: Implement bond megaflow using recirculation

Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-05 15:27:31 -08:00
+								            (*depth)++;
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								            for (i = 0; i < cnt; i++) {
 								                struct dpif_packet *recirc_pkt;
 								                struct pkt_metadata recirc_md = *md;
 								                recirc_pkt = (may_steal) ? packets[i]
 								                                    : dpif_packet_clone(packets[i]);
 								                recirc_md.recirc_id = nl_attr_get_u32(a);
 								                /* Hash is private to each packet */
-												packet-dpif: Add dpif_packet_{get, set}_hash()

These function are used to stored the packet hash. 'netdev-dpdk'
automatically set this value to the RSS hash returned by the
NIC. Other 'netdev's set it to 0 (which is an invalid hash
value), so that callers can compute the hash on their own.

If DPDK support is enabled, struct dpif_packet's member
'dp_hash' is removed and 'pkt.hash.rss' from DPDK mbuf is used

This commit also configure DPDK devices to compute RSS hash
for UDP and IPv6 packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:42 -07:00
+								                recirc_md.dp_hash = dpif_packet_get_dp_hash(packets[i]);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								                dp_netdev_input(pmd, &recirc_pkt, 1,
-												dpif-netdev: Exact match cache

Since lookups in the classifier can be pretty expensive,
we introduce this (thread local) cache which simply
compares the miniflows of the packets

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-29 16:06:43 -07:00
+								                                &recirc_md);
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								            }
-												ofproto/bond: Implement bond megaflow using recirculation

Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-05 15:27:31 -08:00
+								            (*depth)--;
 								            break;
 								        } else {
 								            VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
-												dpif-netdev: delete lost packets in dp_execute_cb()

This commit fixes memory leaks in dp_execute_cb() in two cases:
    - when the output port cannot be found
    - when the recirculation depth is exceeded

Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-25 11:39:34 -07:00
+								            if (may_steal) {
 								                for (i = 0; i < cnt; i++) {
 								                    dpif_packet_delete(packets[i]);
 								                }
 								            }
-												ofproto/bond: Implement bond megaflow using recirculation

Infrastructure to enable megaflow support for bond ports using
recirculation. This patch adds the following features:
* Generate RECIRC action when bond can benefit from recirculation.
* Populate post recirculation rules in a hidden table. Currently table 254.
* Uses post recirculation rules for bond rebalancing
* A recirculation implementation in dpif-netdev.

The goal of this patch is to be able to megaflow bond outputs and
thus greatly improve performance. However, this patch does not
actually improve the megaflow generation. It is left for a later commit.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-05 15:27:31 -08:00
+								        }
-												dpif-netdev: user space datapath recirculation

Add basic recirculation infrastructure and user space
data path support for it. The following bond mega flow patch will
make use of this infrastructure.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-04 15:36:03 -08:00
+								        break;
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    case OVS_ACTION_ATTR_PUSH_VLAN:
 								    case OVS_ACTION_ATTR_POP_VLAN:
 								    case OVS_ACTION_ATTR_PUSH_MPLS:
 								    case OVS_ACTION_ATTR_POP_MPLS:
 								    case OVS_ACTION_ATTR_SET:
-												lib/odp: Masked set action execution and printing.

Add a new action type OVS_ACTION_ATTR_SET_MASKED, and support for
parsing, printing, and committing them.

Masked set actions add a mask, immediately following the netlink
attribute data, within the netlink attribute itself.  Thus the key
attribute size for a masked set action is exactly double of the
non-masked set action.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-09-05 15:44:19 -07:00
+								    case OVS_ACTION_ATTR_SET_MASKED:
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    case OVS_ACTION_ATTR_SAMPLE:
 								    case OVS_ACTION_ATTR_UNSPEC:
 								    case __OVS_ACTION_ATTR_MAX:
 								        OVS_NOT_REACHED();
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								    }
-												datapath: Move Netlink PID for userspace actions from flows to actions.

Commit b063d9f06 "datapath: Use unicast Netlink sockets for upcalls" that
switched from multicast to unicast Netlink for sending upcalls added a
Netlink PID to each kernel flow, used by OVS_ACTION_ATTR_USERSPACE actions
within the flow as target.

This commit drops this per-flow PID in favor of a per-action PID, because
that is more flexible.  It does not yet make use of this additional
flexibility, so behavior should not change.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Bug #7559.

											
										
										
											2011-10-12 16:24:54 -07:00
+								}
-												datapath: Refactor actions in terms of match fields.

Almost all current actions can be expressed in the form of
push/pop/set <field>, where field is one of the match fields. We can
create three base actions and take a field. This has both a nice
symmetry and avoids inconsistencies where we can match on the vlan
TPID but not set it.
Following patch converts all actions to this new format.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

Bug #7115

											
										
										
											2011-10-21 14:38:54 -07:00
+								static void
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								                          struct dpif_packet **packets, int cnt,
 								                          bool may_steal, struct pkt_metadata *md,
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								                          const struct nlattr *actions, size_t actions_len)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Create multiple pmd threads by default.

With this commit, ovs by default will create one pmd thread
for each numa node and pin the pmd thread to available cpu
core on the numa node.

NON_PMD_CORE_ID (currently 0) is used to reserve a particular
cpu core for the I/O of all non-pmd threads.  No pmd thread
can be pinned to this reserved core.

As side-effects of this commit:

-  pmd thread will not be created, if there is no dpdk interface
   from the corresponding numa node added to ovs.

- the exact-match cache for non-pmd threads is removed from
  'struct dp_netdev'.  Instead, all non-pmd threads will use
  the exact-match cache defined in the 'struct dp_netdev_pmd_thread'
  for NON_PMD_CORE_ID.

- the rx packet processing functions are refactored to use
  'struct dp_netdev_pmd_thread' as input.

- the 'netdev_send()' function will be called with the proper
  queue id.

- both pmd and non-pmd threads can call the dpif_netdev_execute().
  so, use a per-thread key to help recognize the calling thread.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>


											
										
										
											2014-09-05 14:14:20 -07:00
+								    struct dp_netdev_execute_aux aux = {pmd};
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
-												dpif-netdev: batch packet processing

This change in dpif-netdev allows faster packet processing for devices which
implement batching (netdev-dpdk currently).

Signed-off-by: Daniele Di Proietto <ddiproietto@vmware.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-06-23 11:43:59 -07:00
+								    odp_execute_actions(&aux, packets, cnt, may_steal, md, actions,
 								                        actions_len, dp_execute_cb);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								const struct dpif_class dpif_netdev_class = {
 								    "netdev",
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								    dpif_netdev_enumerate,
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								    dpif_netdev_port_open_type,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_open,
 								    dpif_netdev_close,
-												Fix some regressions from the merge from master.

											
										
										
											2010-02-08 13:22:41 -05:00
+								    dpif_netdev_destroy,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    dpif_netdev_run,
 								    dpif_netdev_wait,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_get_stats,
 								    dpif_netdev_port_add,
 								    dpif_netdev_port_del,
 								    dpif_netdev_port_query_by_number,
 								    dpif_netdev_port_query_by_name,
-												datapath: Move Netlink PID for userspace actions from flows to actions.

Commit b063d9f06 "datapath: Use unicast Netlink sockets for upcalls" that
switched from multicast to unicast Netlink for sending upcalls added a
Netlink PID to each kernel flow, used by OVS_ACTION_ATTR_USERSPACE actions
within the flow as target.

This commit drops this per-flow PID in favor of a per-action PID, because
that is more flexible.  It does not yet make use of this additional
flexibility, so behavior should not change.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Bug #7559.

											
										
										
											2011-10-12 16:24:54 -07:00
+								    NULL,                       /* port_get_pid */
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    dpif_netdev_port_dump_start,
 								    dpif_netdev_port_dump_next,
 								    dpif_netdev_port_dump_done,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_port_poll,
 								    dpif_netdev_port_poll_wait,
 								    dpif_netdev_flow_flush,
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    dpif_netdev_flow_dump_create,
 								    dpif_netdev_flow_dump_destroy,
 								    dpif_netdev_flow_dump_thread_create,
 								    dpif_netdev_flow_dump_thread_destroy,
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    dpif_netdev_flow_dump_next,
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								    dpif_netdev_operate,
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    NULL,                       /* recv_set */
 								    NULL,                       /* handlers_set */
-												dpif-netdev: Allow enqueue actions.

The dpif-netdev implementation disallowed enqueue actions because
it did not support conversion from OVS 'queue_id' to dpif
'priority'.  For testing purposes, this patch allows queues which
translate into NOOPs.

											
										
										
											2011-11-21 13:36:17 -08:00
+								    dpif_netdev_queue_to_priority,
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    NULL,                       /* recv */
 								    NULL,                       /* recv_wait */
 								    NULL,                       /* recv_purge */
 								    dpif_netdev_register_upcall_cb,
 								    dpif_netdev_enable_upcall,
 								    dpif_netdev_disable_upcall,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								static void
 								dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                              const char *argv[], void *aux OVS_UNUSED)
 								{
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    struct dp_netdev_port *old_port;
 								    struct dp_netdev_port *new_port;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    struct dp_netdev *dp;
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    odp_port_t port_no;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    dp = shash_find_data(&dp_netdevs, argv[1]);
 								    if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        ovs_mutex_unlock(&dp_netdev_mutex);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
 								        return;
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_refcount_ref(&dp->ref_cnt);
 								    ovs_mutex_unlock(&dp_netdev_mutex);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_lock(&dp->port_mutex);
 								    if (get_port_by_name(dp, argv[2], &old_port)) {
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "unknown port");
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        goto exit;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    port_no = u32_to_odp(atoi(argv[3]));
 								    if (!port_no || port_no == ODPP_NONE) {
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "bad port number");
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        goto exit;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    if (dp_netdev_lookup_port(dp, port_no)) {
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "port number already in use");
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        goto exit;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    }
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
 								    /* Remove old port. */
 								    cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
 								    ovsrcu_postpone(free, old_port);
 								    /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
 								    new_port = xmemdup(old_port, sizeof *old_port);
 								    new_port->port_no = port_no;
 								    cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_change(dp->port_seq);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    unixctl_command_reply(conn, NULL);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								exit:
-												dpif-netdev: Use cmap for ports.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-05-20 13:21:09 -07:00
+								    ovs_mutex_unlock(&dp->port_mutex);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    dp_netdev_unref(dp);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								}
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+								static void
 								dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                       const char *argv[], void *aux OVS_UNUSED)
 								{
 								    struct dp_netdev_port *port;
 								    struct dp_netdev *dp;
 								    ovs_mutex_lock(&dp_netdev_mutex);
 								    dp = shash_find_data(&dp_netdevs, argv[1]);
 								    if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
 								        ovs_mutex_unlock(&dp_netdev_mutex);
 								        unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
 								        return;
 								    }
 								    ovs_refcount_ref(&dp->ref_cnt);
 								    ovs_mutex_unlock(&dp_netdev_mutex);
 								    ovs_mutex_lock(&dp->port_mutex);
 								    if (get_port_by_name(dp, argv[2], &port)) {
 								        unixctl_command_reply_error(conn, "unknown port");
 								    } else if (port->port_no == ODPP_LOCAL) {
 								        unixctl_command_reply_error(conn, "can't delete local port");
 								    } else {
 								        do_del_port(dp, port);
 								        unixctl_command_reply(conn, NULL);
 								    }
 								    ovs_mutex_unlock(&dp->port_mutex);
 								    dp_netdev_unref(dp);
 								}
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								static void
 								dpif_dummy_register__(const char *type)
 								{
 								    struct dpif_class *class;
 								    class = xmalloc(sizeof *class);
 								    *class = dpif_netdev_class;
 								    class->type = xstrdup(type);
 								    dp_register_provider(class);
 								}
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								void
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								dpif_dummy_register(bool override)
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								{
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    if (override) {
 								        struct sset types;
 								        const char *type;
 								        sset_init(&types);
 								        dp_enumerate_types(&types);
 								        SSET_FOR_EACH (type, &types) {
 								            if (!dp_unregister_provider(type)) {
 								                dpif_dummy_register__(type);
 								            }
 								        }
 								        sset_destroy(&types);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								    }
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
 								    dpif_dummy_register__("dummy");
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
 								    unixctl_command_register("dpif-dummy/change-port-number",
-												unixctl: Make command description all lowercase.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-22 16:27:22 -07:00
+								                             "dp port new-number",
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+, 3, dpif_dummy_change_port_number, NULL);
-												unixctl: Make command description all lowercase.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-22 16:27:22 -07:00
+								    unixctl_command_register("dpif-dummy/delete-port", "dp port",
-												bridge: Add test that ports that disappear get added back to the datapath.

The test added in this commit would have caught the bug fixed by commit
96be8de595150 (bridge: When ports disappear from a datapath, add them
back.).  With that commit reverted, the new test fails.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Gurucharan Shetty <gshetty@nicira.com>

											
										
										
											2014-05-22 09:36:00 -07:00
+, 2, dpif_dummy_delete_port, NULL);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								}